1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i32 @v_srem_i32(i32 %num, i32 %den) {
8 ; GISEL-LABEL: v_srem_i32:
10 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
12 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
13 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
14 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
15 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
16 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
17 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
18 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
19 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
20 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
21 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
22 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
23 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
24 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
25 ; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
26 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
27 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
28 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
29 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
30 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
31 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
32 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
33 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
34 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
35 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
36 ; GISEL-NEXT: s_setpc_b64 s[30:31]
38 ; CGP-LABEL: v_srem_i32:
40 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
41 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
42 ; CGP-NEXT: v_ashrrev_i32_e32 v3, 31, v1
43 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
44 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v3
45 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
46 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v3
47 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
48 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
49 ; CGP-NEXT: v_rcp_f32_e32 v3, v3
50 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
51 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
52 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
53 ; CGP-NEXT: v_mul_lo_u32 v5, 0, v4
54 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
55 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
56 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
57 ; CGP-NEXT: v_mul_lo_u32 v4, 0, v3
58 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
59 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3
60 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v1
61 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
62 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
63 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
64 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
65 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
66 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
67 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
68 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
69 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
70 ; CGP-NEXT: s_setpc_b64 s[30:31]
71 %result = srem i32 %num, %den
75 ; FIXME: This is a workaround for not handling uniform VGPR case.
76 declare i32 @llvm.amdgcn.readfirstlane(i32)
78 define amdgpu_ps i32 @s_srem_i32(i32 inreg %num, i32 inreg %den) {
79 ; GISEL-LABEL: s_srem_i32:
81 ; GISEL-NEXT: s_ashr_i32 s2, s0, 31
82 ; GISEL-NEXT: s_ashr_i32 s3, s1, 31
83 ; GISEL-NEXT: s_add_i32 s0, s0, s2
84 ; GISEL-NEXT: s_add_i32 s1, s1, s3
85 ; GISEL-NEXT: s_xor_b32 s0, s0, s2
86 ; GISEL-NEXT: s_xor_b32 s1, s1, s3
87 ; GISEL-NEXT: v_cvt_f32_u32_e32 v0, s1
88 ; GISEL-NEXT: s_sub_i32 s3, 0, s1
89 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v0, v0
90 ; GISEL-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
91 ; GISEL-NEXT: v_cvt_u32_f32_e32 v0, v0
92 ; GISEL-NEXT: v_mul_lo_u32 v1, s3, v0
93 ; GISEL-NEXT: v_mul_hi_u32 v1, v0, v1
94 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
95 ; GISEL-NEXT: v_mul_hi_u32 v0, s0, v0
96 ; GISEL-NEXT: v_mul_lo_u32 v0, v0, s1
97 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
98 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
99 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
100 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
101 ; GISEL-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
102 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
103 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
104 ; GISEL-NEXT: v_xor_b32_e32 v0, s2, v0
105 ; GISEL-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
106 ; GISEL-NEXT: v_readfirstlane_b32 s0, v0
107 ; GISEL-NEXT: ; return to shader part epilog
109 ; CGP-LABEL: s_srem_i32:
111 ; CGP-NEXT: s_ashr_i32 s2, s0, 31
112 ; CGP-NEXT: s_ashr_i32 s3, s1, 31
113 ; CGP-NEXT: s_add_i32 s0, s0, s2
114 ; CGP-NEXT: s_add_i32 s1, s1, s3
115 ; CGP-NEXT: s_xor_b32 s0, s0, s2
116 ; CGP-NEXT: s_xor_b32 s1, s1, s3
117 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, s1
118 ; CGP-NEXT: s_sub_i32 s3, 0, s1
119 ; CGP-NEXT: v_rcp_f32_e32 v0, v0
120 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
121 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
122 ; CGP-NEXT: v_mul_lo_u32 v1, s3, v0
123 ; CGP-NEXT: v_mul_lo_u32 v2, 0, v1
124 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
125 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v2, v1
126 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
127 ; CGP-NEXT: v_mul_lo_u32 v1, 0, v0
128 ; CGP-NEXT: v_mul_hi_u32 v0, s0, v0
129 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
130 ; CGP-NEXT: v_mul_lo_u32 v0, v0, s1
131 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
132 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
133 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
134 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
135 ; CGP-NEXT: v_subrev_i32_e32 v1, vcc, s1, v0
136 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s1, v0
137 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
138 ; CGP-NEXT: v_xor_b32_e32 v0, s2, v0
139 ; CGP-NEXT: v_subrev_i32_e32 v0, vcc, s2, v0
140 ; CGP-NEXT: v_readfirstlane_b32 s0, v0
141 ; CGP-NEXT: ; return to shader part epilog
142 %result = srem i32 %num, %den
143 %readlane = call i32 @llvm.amdgcn.readfirstlane(i32 %result)
147 define <2 x i32> @v_srem_v2i32(<2 x i32> %num, <2 x i32> %den) {
148 ; GISEL-LABEL: v_srem_v2i32:
150 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
152 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2
153 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
154 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
155 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
156 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
157 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
158 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
159 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
160 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
161 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
162 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
163 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
164 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
165 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
166 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
167 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
168 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
169 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
170 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
171 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
172 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
173 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
174 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
175 ; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
176 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
177 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
178 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
179 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
180 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
181 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
182 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
183 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
184 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
185 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
186 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
187 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
188 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
189 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
190 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
191 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
192 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
193 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
194 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
195 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
196 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
197 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
198 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
199 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
200 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
201 ; GISEL-NEXT: s_setpc_b64 s[30:31]
203 ; CGP-LABEL: v_srem_v2i32:
205 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0
207 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v2
208 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
209 ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
210 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
211 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
212 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
213 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
214 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
215 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v5
216 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
217 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
218 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
219 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
220 ; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3
221 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
222 ; CGP-NEXT: v_rcp_f32_e32 v5, v5
223 ; CGP-NEXT: v_rcp_f32_e32 v8, v8
224 ; CGP-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
225 ; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
226 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
227 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
228 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v5
229 ; CGP-NEXT: v_mul_lo_u32 v9, v9, v8
230 ; CGP-NEXT: v_mul_lo_u32 v10, 0, v7
231 ; CGP-NEXT: v_mul_hi_u32 v7, v5, v7
232 ; CGP-NEXT: v_mul_lo_u32 v11, 0, v9
233 ; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
234 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7
235 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
236 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v7
237 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9
238 ; CGP-NEXT: v_mul_lo_u32 v8, 0, v5
239 ; CGP-NEXT: v_mul_hi_u32 v5, v0, v5
240 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
241 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
242 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
243 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
244 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
245 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v3
246 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
247 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
248 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
249 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
250 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
251 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
252 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
253 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
254 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
255 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
256 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
257 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
258 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
259 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
260 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
261 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
262 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
263 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
264 ; CGP-NEXT: s_setpc_b64 s[30:31]
265 %result = srem <2 x i32> %num, %den
266 ret <2 x i32> %result
269 define i32 @v_srem_i32_pow2k_denom(i32 %num) {
270 ; CHECK-LABEL: v_srem_i32_pow2k_denom:
272 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; CHECK-NEXT: s_movk_i32 s4, 0x1000
274 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
275 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x45800000
276 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xfffff000
277 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
278 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
279 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
280 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
281 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
282 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
283 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
284 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
285 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 12, v2
286 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
287 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
288 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
289 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
290 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
291 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
292 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
293 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
294 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
295 ; CHECK-NEXT: s_setpc_b64 s[30:31]
296 %result = srem i32 %num, 4096
300 define <2 x i32> @v_srem_v2i32_pow2k_denom(<2 x i32> %num) {
301 ; GISEL-LABEL: v_srem_v2i32_pow2k_denom:
303 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
305 ; GISEL-NEXT: s_add_i32 s4, 0x1000, 0
306 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
307 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
308 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4
309 ; GISEL-NEXT: s_sub_i32 s5, 0, s4
310 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
311 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
312 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
313 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
314 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4
315 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
316 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
317 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
318 ; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5
319 ; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4
320 ; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6
321 ; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
322 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
323 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
324 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
325 ; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
326 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4
327 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4
328 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
329 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
330 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
331 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
332 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
333 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
334 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
335 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
336 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
337 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
338 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
339 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
340 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
341 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
342 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
343 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
344 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
345 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
346 ; GISEL-NEXT: s_setpc_b64 s[30:31]
348 ; CGP-LABEL: v_srem_v2i32_pow2k_denom:
350 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; CGP-NEXT: s_movk_i32 s4, 0x1000
352 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
353 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x45800000
354 ; CGP-NEXT: s_movk_i32 s5, 0xf000
355 ; CGP-NEXT: v_mov_b32_e32 v4, 0xfffff000
356 ; CGP-NEXT: v_mov_b32_e32 v5, 0x1000
357 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
358 ; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x45800000
359 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
360 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
361 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
362 ; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
363 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
364 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
365 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
366 ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
367 ; CGP-NEXT: v_mul_lo_u32 v8, s5, v3
368 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
369 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
370 ; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
371 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
372 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
373 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
374 ; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
375 ; CGP-NEXT: v_lshlrev_b32_e32 v3, 12, v3
376 ; CGP-NEXT: v_lshlrev_b32_e32 v4, 12, v4
377 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
378 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
379 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
380 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
381 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
382 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
383 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
384 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
385 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
386 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
387 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
388 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
389 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
390 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
391 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
392 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
393 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
394 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
395 ; CGP-NEXT: s_setpc_b64 s[30:31]
396 %result = srem <2 x i32> %num, <i32 4096, i32 4096>
397 ret <2 x i32> %result
400 define i32 @v_srem_i32_oddk_denom(i32 %num) {
401 ; CHECK-LABEL: v_srem_i32_oddk_denom:
403 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
405 ; CHECK-NEXT: v_ashrrev_i32_e32 v1, 31, v0
406 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v2, 0x4996c7d8
407 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xffed2705
408 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
409 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
410 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
411 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
412 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v2
413 ; CHECK-NEXT: v_mul_hi_u32 v3, v2, v3
414 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
415 ; CHECK-NEXT: v_mul_hi_u32 v2, v0, v2
416 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, s4
417 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
418 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
419 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
420 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
421 ; CHECK-NEXT: v_subrev_i32_e32 v2, vcc, s4, v0
422 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
423 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
424 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v1
425 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
426 ; CHECK-NEXT: s_setpc_b64 s[30:31]
427 %result = srem i32 %num, 1235195
431 define <2 x i32> @v_srem_v2i32_oddk_denom(<2 x i32> %num) {
432 ; GISEL-LABEL: v_srem_v2i32_oddk_denom:
434 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
436 ; GISEL-NEXT: s_add_i32 s4, 0x12d8fb, 0
437 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
438 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
439 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s4
440 ; GISEL-NEXT: s_sub_i32 s5, 0, s4
441 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
442 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
443 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
444 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
445 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v4
446 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
447 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
448 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
449 ; GISEL-NEXT: v_mul_lo_u32 v6, s5, v5
450 ; GISEL-NEXT: v_mul_lo_u32 v7, s5, v4
451 ; GISEL-NEXT: v_mul_hi_u32 v6, v5, v6
452 ; GISEL-NEXT: v_mul_hi_u32 v7, v4, v7
453 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
454 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
455 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
456 ; GISEL-NEXT: v_mul_hi_u32 v4, v1, v4
457 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, s4
458 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, s4
459 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
460 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
461 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
462 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
463 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
464 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
465 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
466 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
467 ; GISEL-NEXT: v_subrev_i32_e32 v4, vcc, s4, v0
468 ; GISEL-NEXT: v_subrev_i32_e32 v5, vcc, s4, v1
469 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
470 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
471 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s4, v1
472 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
473 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
474 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
475 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
476 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
477 ; GISEL-NEXT: s_setpc_b64 s[30:31]
479 ; CGP-LABEL: v_srem_v2i32_oddk_denom:
481 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
482 ; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
483 ; CGP-NEXT: v_ashrrev_i32_e32 v2, 31, v0
484 ; CGP-NEXT: v_rcp_iflag_f32_e32 v3, 0x4996c7d8
485 ; CGP-NEXT: s_mov_b32 s5, 0xffed2705
486 ; CGP-NEXT: v_mov_b32_e32 v4, 0xffed2705
487 ; CGP-NEXT: v_mov_b32_e32 v5, 0x12d8fb
488 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v1
489 ; CGP-NEXT: v_rcp_iflag_f32_e32 v7, 0x4996c7d8
490 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
491 ; CGP-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
492 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v6
493 ; CGP-NEXT: v_mul_f32_e32 v7, 0x4f7ffffe, v7
494 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
495 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
496 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
497 ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
498 ; CGP-NEXT: v_mul_lo_u32 v8, s5, v3
499 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v7
500 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
501 ; CGP-NEXT: v_mul_hi_u32 v4, v7, v4
502 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
503 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v7, v4
504 ; CGP-NEXT: v_mul_hi_u32 v3, v0, v3
505 ; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
506 ; CGP-NEXT: v_mul_lo_u32 v3, v3, s4
507 ; CGP-NEXT: v_mul_lo_u32 v4, v4, s4
508 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
509 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v4
510 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
511 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
512 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
513 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
514 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
515 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
516 ; CGP-NEXT: v_subrev_i32_e32 v3, vcc, s4, v0
517 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
518 ; CGP-NEXT: v_cmp_le_u32_e32 vcc, s4, v0
519 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
520 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
521 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc
522 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v2
523 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v6
524 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
525 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
526 ; CGP-NEXT: s_setpc_b64 s[30:31]
527 %result = srem <2 x i32> %num, <i32 1235195, i32 1235195>
528 ret <2 x i32> %result
531 define i32 @v_srem_i32_pow2_shl_denom(i32 %x, i32 %y) {
532 ; CHECK-LABEL: v_srem_i32_pow2_shl_denom:
534 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535 ; CHECK-NEXT: v_lshl_b32_e32 v1, 0x1000, v1
536 ; CHECK-NEXT: v_ashrrev_i32_e32 v2, 31, v0
537 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v1
538 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
539 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v3
540 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
541 ; CHECK-NEXT: v_xor_b32_e32 v1, v1, v3
542 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, v1
543 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
544 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
545 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
546 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
547 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, v3
548 ; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
549 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
550 ; CHECK-NEXT: v_mul_hi_u32 v3, v0, v3
551 ; CHECK-NEXT: v_mul_lo_u32 v3, v3, v1
552 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
553 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
554 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
555 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
556 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
557 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
558 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
559 ; CHECK-NEXT: v_xor_b32_e32 v0, v0, v2
560 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
561 ; CHECK-NEXT: s_setpc_b64 s[30:31]
562 %shl.y = shl i32 4096, %y
563 %r = srem i32 %x, %shl.y
567 define <2 x i32> @v_srem_v2i32_pow2_shl_denom(<2 x i32> %x, <2 x i32> %y) {
568 ; GISEL-LABEL: v_srem_v2i32_pow2_shl_denom:
570 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GISEL-NEXT: s_movk_i32 s4, 0x1000
572 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
573 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v1
574 ; GISEL-NEXT: v_lshl_b32_e32 v2, s4, v2
575 ; GISEL-NEXT: v_lshl_b32_e32 v3, s4, v3
576 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
577 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
578 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v2
579 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
580 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
581 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
582 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
583 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
584 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v6
585 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
586 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v2
587 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
588 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
589 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
590 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
591 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
592 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
593 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
594 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
595 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
596 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v6
597 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
598 ; GISEL-NEXT: v_mul_hi_u32 v7, v6, v7
599 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
600 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v7
601 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
602 ; GISEL-NEXT: v_mul_hi_u32 v6, v0, v6
603 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
604 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v2
605 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
606 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
607 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
608 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
609 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
610 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
611 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
612 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
613 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
614 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
615 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
616 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
617 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
618 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
619 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
620 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
621 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v5
622 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
623 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
624 ; GISEL-NEXT: s_setpc_b64 s[30:31]
626 ; CGP-LABEL: v_srem_v2i32_pow2_shl_denom:
628 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629 ; CGP-NEXT: s_movk_i32 s4, 0x1000
630 ; CGP-NEXT: v_ashrrev_i32_e32 v4, 31, v0
631 ; CGP-NEXT: v_ashrrev_i32_e32 v5, 31, v1
632 ; CGP-NEXT: v_lshl_b32_e32 v2, s4, v2
633 ; CGP-NEXT: v_lshl_b32_e32 v3, s4, v3
634 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
635 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v5
636 ; CGP-NEXT: v_ashrrev_i32_e32 v6, 31, v2
637 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
638 ; CGP-NEXT: v_ashrrev_i32_e32 v7, 31, v3
639 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
640 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
641 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v7
642 ; CGP-NEXT: v_xor_b32_e32 v2, v2, v6
643 ; CGP-NEXT: v_xor_b32_e32 v3, v3, v7
644 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v2
645 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
646 ; CGP-NEXT: v_cvt_f32_u32_e32 v8, v3
647 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
648 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
649 ; CGP-NEXT: v_rcp_f32_e32 v8, v8
650 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
651 ; CGP-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
652 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
653 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
654 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
655 ; CGP-NEXT: v_mul_lo_u32 v9, v9, v8
656 ; CGP-NEXT: v_mul_lo_u32 v10, 0, v7
657 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
658 ; CGP-NEXT: v_mul_lo_u32 v11, 0, v9
659 ; CGP-NEXT: v_mul_hi_u32 v9, v8, v9
660 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v10, v7
661 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
662 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v7
663 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v8, v9
664 ; CGP-NEXT: v_mul_lo_u32 v8, 0, v6
665 ; CGP-NEXT: v_mul_hi_u32 v6, v0, v6
666 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
667 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
668 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
669 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
670 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
671 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v3
672 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v6
673 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
674 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
675 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
676 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
677 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
678 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
679 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
680 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v0, v2
681 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
682 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
683 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
684 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
685 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
686 ; CGP-NEXT: v_xor_b32_e32 v0, v0, v4
687 ; CGP-NEXT: v_xor_b32_e32 v1, v1, v5
688 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
689 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
690 ; CGP-NEXT: s_setpc_b64 s[30:31]
691 %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
692 %r = srem <2 x i32> %x, %shl.y
696 define i32 @v_srem_i32_24bit(i32 %num, i32 %den) {
697 ; GISEL-LABEL: v_srem_i32_24bit:
699 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GISEL-NEXT: s_mov_b32 s4, 0xffffff
701 ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
702 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v1
703 ; GISEL-NEXT: v_ashrrev_i32_e32 v2, 31, v0
704 ; GISEL-NEXT: v_ashrrev_i32_e32 v3, 31, v1
705 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
706 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
707 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
708 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v3
709 ; GISEL-NEXT: v_cvt_f32_u32_e32 v3, v1
710 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
711 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v3, v3
712 ; GISEL-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3
713 ; GISEL-NEXT: v_cvt_u32_f32_e32 v3, v3
714 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v3
715 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
716 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v4
717 ; GISEL-NEXT: v_mul_hi_u32 v3, v0, v3
718 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v1
719 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
720 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
721 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
722 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
723 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v0, v1
724 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
725 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
726 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v2
727 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
728 ; GISEL-NEXT: s_setpc_b64 s[30:31]
730 ; CGP-LABEL: v_srem_i32_24bit:
732 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
733 ; CGP-NEXT: s_mov_b32 s4, 0xffffff
734 ; CGP-NEXT: v_and_b32_e32 v0, s4, v0
735 ; CGP-NEXT: v_and_b32_e32 v1, s4, v1
736 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v1
737 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
738 ; CGP-NEXT: v_rcp_f32_e32 v2, v2
739 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
740 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
741 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
742 ; CGP-NEXT: v_mul_lo_u32 v4, 0, v3
743 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
744 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v4, v3
745 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
746 ; CGP-NEXT: v_mul_lo_u32 v3, 0, v2
747 ; CGP-NEXT: v_mul_hi_u32 v2, v0, v2
748 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
749 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
750 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
751 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
752 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
753 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
754 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
755 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
756 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
757 ; CGP-NEXT: s_setpc_b64 s[30:31]
758 %num.mask = and i32 %num, 16777215
759 %den.mask = and i32 %den, 16777215
760 %result = srem i32 %num.mask, %den.mask
764 define <2 x i32> @v_srem_v2i32_24bit(<2 x i32> %num, <2 x i32> %den) {
765 ; GISEL-LABEL: v_srem_v2i32_24bit:
767 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768 ; GISEL-NEXT: s_mov_b32 s4, 0xffffff
769 ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
770 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v1
771 ; GISEL-NEXT: v_and_b32_e32 v2, s4, v2
772 ; GISEL-NEXT: v_and_b32_e32 v3, s4, v3
773 ; GISEL-NEXT: v_ashrrev_i32_e32 v4, 31, v0
774 ; GISEL-NEXT: v_ashrrev_i32_e32 v5, 31, v2
775 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v1
776 ; GISEL-NEXT: v_ashrrev_i32_e32 v7, 31, v3
777 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
778 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v5
779 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v6
780 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
781 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
782 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v5
783 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
784 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v7
785 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v2
786 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, 0, v2
787 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
788 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
789 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
790 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
791 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x4f7ffffe, v5
792 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x4f7ffffe, v8
793 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
794 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
795 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v5
796 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v8
797 ; GISEL-NEXT: v_mul_hi_u32 v7, v5, v7
798 ; GISEL-NEXT: v_mul_hi_u32 v9, v8, v9
799 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
800 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v8, v9
801 ; GISEL-NEXT: v_mul_hi_u32 v5, v0, v5
802 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
803 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v2
804 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, v3
805 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v5
806 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v7
807 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
808 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
809 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
810 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
811 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
812 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
813 ; GISEL-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
814 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v1, v3
815 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
816 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
817 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
818 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
819 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v4
820 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v6
821 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
822 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
823 ; GISEL-NEXT: s_setpc_b64 s[30:31]
825 ; CGP-LABEL: v_srem_v2i32_24bit:
827 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
828 ; CGP-NEXT: s_mov_b32 s4, 0xffffff
829 ; CGP-NEXT: v_and_b32_e32 v0, s4, v0
830 ; CGP-NEXT: v_and_b32_e32 v1, s4, v1
831 ; CGP-NEXT: v_and_b32_e32 v2, s4, v2
832 ; CGP-NEXT: v_and_b32_e32 v3, s4, v3
833 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
834 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, 0, v2
835 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v3
836 ; CGP-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
837 ; CGP-NEXT: v_rcp_f32_e32 v4, v4
838 ; CGP-NEXT: v_rcp_f32_e32 v6, v6
839 ; CGP-NEXT: v_mul_f32_e32 v4, 0x4f7ffffe, v4
840 ; CGP-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6
841 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
842 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
843 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v4
844 ; CGP-NEXT: v_mul_lo_u32 v7, v7, v6
845 ; CGP-NEXT: v_mul_lo_u32 v8, 0, v5
846 ; CGP-NEXT: v_mul_hi_u32 v5, v4, v5
847 ; CGP-NEXT: v_mul_lo_u32 v9, 0, v7
848 ; CGP-NEXT: v_mul_hi_u32 v7, v6, v7
849 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v8, v5
850 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v9, v7
851 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
852 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v6, v7
853 ; CGP-NEXT: v_mul_lo_u32 v6, 0, v4
854 ; CGP-NEXT: v_mul_hi_u32 v4, v0, v4
855 ; CGP-NEXT: v_mul_lo_u32 v7, 0, v5
856 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
857 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
858 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
859 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v2
860 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v3
861 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v4
862 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
863 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
864 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
865 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
866 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
867 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
868 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
869 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
870 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
871 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
872 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
873 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3
874 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc
875 ; CGP-NEXT: s_setpc_b64 s[30:31]
876 %num.mask = and <2 x i32> %num, <i32 16777215, i32 16777215>
877 %den.mask = and <2 x i32> %den, <i32 16777215, i32 16777215>
878 %result = srem <2 x i32> %num.mask, %den.mask
879 ret <2 x i32> %result