1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i64 @v_urem_i64(i64 %num, i64 %den) {
8 ; CHECK-LABEL: v_urem_i64:
10 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; CHECK-NEXT: v_mov_b32_e32 v4, v0
12 ; CHECK-NEXT: v_mov_b32_e32 v5, v1
13 ; CHECK-NEXT: v_or_b32_e32 v1, v5, v3
14 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
15 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
17 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
18 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
19 ; CHECK-NEXT: s_cbranch_execz .LBB0_2
20 ; CHECK-NEXT: ; %bb.1:
21 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
22 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v3
23 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
24 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
25 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
26 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
27 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
28 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
29 ; CHECK-NEXT: v_trunc_f32_e32 v1, v1
30 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
31 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
32 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
33 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v1
34 ; CHECK-NEXT: v_mul_lo_u32 v9, v6, v0
35 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
36 ; CHECK-NEXT: v_mul_hi_u32 v11, v6, v0
37 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
38 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v9
39 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
40 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v9
41 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
42 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
43 ; CHECK-NEXT: v_mul_lo_u32 v13, v1, v8
44 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
45 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
46 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
47 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
48 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
49 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
50 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
51 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
52 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
53 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
54 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
55 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
56 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
57 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
58 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
59 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
60 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
61 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
62 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0
63 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
64 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v0
65 ; CHECK-NEXT: v_mul_lo_u32 v6, v6, v1
66 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v8
67 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
68 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
69 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
70 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
71 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6
72 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v6
73 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v6
74 ; CHECK-NEXT: v_mul_hi_u32 v6, v1, v6
75 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
76 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
77 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
78 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
79 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
80 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
81 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
82 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
83 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
84 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
85 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
86 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
87 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
88 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
89 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
90 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
91 ; CHECK-NEXT: v_mul_lo_u32 v6, v5, v0
92 ; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
93 ; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
94 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v1
95 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1
96 ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1
97 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
98 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
99 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
100 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
101 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
102 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
103 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
104 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
105 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
106 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
107 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
108 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
109 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
110 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
111 ; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0
112 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0
113 ; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0
114 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6
115 ; CHECK-NEXT: v_mul_lo_u32 v1, v2, v1
116 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1
117 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
118 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v7
119 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v0, vcc
120 ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v0
121 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
122 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
123 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3
124 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
125 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
126 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
127 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
128 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v2
129 ; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc
130 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2
131 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
132 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
133 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3
134 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
135 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v6, v2
136 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
137 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3
138 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
139 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
140 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
141 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v0, vcc
142 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
143 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
144 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
145 ; CHECK-NEXT: ; implicit-def: $vgpr2
146 ; CHECK-NEXT: ; implicit-def: $vgpr4
147 ; CHECK-NEXT: .LBB0_2: ; %Flow
148 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
149 ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
150 ; CHECK-NEXT: s_cbranch_execz .LBB0_4
151 ; CHECK-NEXT: ; %bb.3:
152 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
153 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
154 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
155 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
156 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
157 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
158 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
159 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
160 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
161 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, v2
162 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
163 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
164 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
165 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
166 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
167 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
168 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
169 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
170 ; CHECK-NEXT: .LBB0_4:
171 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
172 ; CHECK-NEXT: s_setpc_b64 s[30:31]
173 %result = urem i64 %num, %den
177 ; FIXME: This is a workaround for not handling uniform VGPR case.
178 declare i32 @llvm.amdgcn.readfirstlane(i32)
180 define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
181 ; CHECK-LABEL: s_urem_i64:
183 ; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3]
184 ; CHECK-NEXT: s_mov_b32 s4, 0
185 ; CHECK-NEXT: s_mov_b32 s5, -1
186 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
187 ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
188 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2
189 ; CHECK-NEXT: ; %bb.1:
190 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
191 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
192 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3
193 ; CHECK-NEXT: s_sub_u32 s4, 0, s2
194 ; CHECK-NEXT: s_cselect_b32 s5, 1, 0
195 ; CHECK-NEXT: v_mov_b32_e32 v3, s1
196 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2
197 ; CHECK-NEXT: s_and_b32 s5, s5, 1
198 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
199 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
200 ; CHECK-NEXT: s_cmp_lg_u32 s5, 0
201 ; CHECK-NEXT: s_subb_u32 s5, 0, s3
202 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
203 ; CHECK-NEXT: v_trunc_f32_e32 v2, v2
204 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
205 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
206 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
207 ; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2
208 ; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0
209 ; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0
210 ; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0
211 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
212 ; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5
213 ; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5
214 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5
215 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
216 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4
217 ; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4
218 ; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4
219 ; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4
220 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
221 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
222 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5
223 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
224 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
225 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
226 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10
227 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
228 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
229 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8
230 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
231 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
232 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
233 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
234 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
235 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc
236 ; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0
237 ; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0
238 ; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0
239 ; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
240 ; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4
241 ; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4
242 ; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4
243 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
244 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
245 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5
246 ; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5
247 ; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5
248 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5
249 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
250 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
251 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4
252 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
253 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
254 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
255 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10
256 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
257 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
258 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
259 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
260 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
261 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
262 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
263 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
264 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
265 ; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0
266 ; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0
267 ; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0
268 ; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2
269 ; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2
270 ; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2
271 ; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2
272 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
273 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
274 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
275 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
276 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
277 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
278 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
279 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
280 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
281 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
282 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
283 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
284 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
285 ; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0
286 ; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0
287 ; CHECK-NEXT: v_mul_hi_u32 v0, s2, v0
288 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
289 ; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2
290 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
291 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0
292 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, s0, v5
293 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v0, vcc
294 ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], s1, v0
295 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v2
296 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
297 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3
298 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
299 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v1, vcc
300 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3
301 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc
302 ; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v2
303 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
304 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3
305 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
306 ; CHECK-NEXT: v_subrev_i32_e32 v5, vcc, s2, v3
307 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0
308 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
309 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0
310 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
311 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
312 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v5, vcc
313 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
314 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
315 ; CHECK-NEXT: s_mov_b32 s5, 0
316 ; CHECK-NEXT: s_branch .LBB1_3
317 ; CHECK-NEXT: .LBB1_2:
318 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
319 ; CHECK-NEXT: .LBB1_3: ; %Flow
320 ; CHECK-NEXT: s_xor_b32 s1, s5, -1
321 ; CHECK-NEXT: s_and_b32 s1, s1, 1
322 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0
323 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
324 ; CHECK-NEXT: ; %bb.4:
325 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
326 ; CHECK-NEXT: s_sub_i32 s1, 0, s2
327 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
328 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
329 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
330 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0
331 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
332 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
333 ; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0
334 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2
335 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
336 ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
337 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
338 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
339 ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
340 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
341 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
342 ; CHECK-NEXT: .LBB1_5:
343 ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
344 ; CHECK-NEXT: s_mov_b32 s1, s0
345 ; CHECK-NEXT: ; return to shader part epilog
346 %result = urem i64 %num, %den
347 %cast = bitcast i64 %result to <2 x i32>
348 %elt.0 = extractelement <2 x i32> %cast, i32 0
349 %elt.1 = extractelement <2 x i32> %cast, i32 1
350 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
351 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
352 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
353 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
354 %cast.back = bitcast <2 x i32> %ins.1 to i64
358 define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
359 ; GISEL-LABEL: v_urem_v2i64:
361 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4
363 ; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5
364 ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9
365 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
366 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8
367 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8
368 ; GISEL-NEXT: v_trunc_f32_e32 v9, v9
369 ; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9
370 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
371 ; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
372 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4
373 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc
374 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8
375 ; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8
376 ; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9
377 ; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8
378 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
379 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
380 ; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12
381 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13
382 ; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12
383 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
384 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
385 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
386 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
387 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
388 ; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13
389 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
390 ; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13
391 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
392 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
393 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
394 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
395 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
396 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
397 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
398 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
399 ; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
400 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
401 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
402 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
403 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8
404 ; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8
405 ; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9
406 ; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8
407 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
408 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
409 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v12
410 ; GISEL-NEXT: v_mul_lo_u32 v13, v8, v10
411 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v12
412 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
413 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
414 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
415 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
416 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
417 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v10
418 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
419 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v10
420 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
421 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
422 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
423 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
424 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
425 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
426 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
427 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
428 ; GISEL-NEXT: v_mul_hi_u32 v10, v9, v10
429 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
430 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
431 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc
432 ; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8
433 ; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9
434 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8
435 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
436 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
437 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
438 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
439 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
440 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9
441 ; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8
442 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9
443 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
444 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
445 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
446 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
447 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
448 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
449 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
450 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
451 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
452 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
453 ; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8
454 ; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8
455 ; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9
456 ; GISEL-NEXT: v_mul_hi_u32 v8, v4, v8
457 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
458 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
459 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
460 ; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v8, vcc
461 ; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v8
462 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v5
463 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
464 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
465 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
466 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v5
467 ; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5]
468 ; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v4
469 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
470 ; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
471 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5
472 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
473 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v4
474 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
475 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5
476 ; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
477 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v10, v4
478 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v5, s[4:5]
479 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
480 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
481 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
482 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc
483 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
484 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
485 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
486 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6
487 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7
488 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
489 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
490 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
491 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
492 ; GISEL-NEXT: v_trunc_f32_e32 v5, v5
493 ; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
494 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
495 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
496 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
497 ; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc
498 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
499 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4
500 ; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5
501 ; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4
502 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
503 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
504 ; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10
505 ; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11
506 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10
507 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
508 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
509 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
510 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
511 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
512 ; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11
513 ; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10
514 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11
515 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
516 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
517 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
518 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
519 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
520 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
521 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
522 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
523 ; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11
524 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
525 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
526 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v11, vcc
527 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
528 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4
529 ; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5
530 ; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4
531 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
532 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
533 ; GISEL-NEXT: v_mul_lo_u32 v9, v5, v10
534 ; GISEL-NEXT: v_mul_lo_u32 v11, v4, v8
535 ; GISEL-NEXT: v_mul_hi_u32 v12, v4, v10
536 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
537 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
538 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
539 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
540 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
541 ; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8
542 ; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10
543 ; GISEL-NEXT: v_mul_hi_u32 v12, v4, v8
544 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
545 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
546 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
547 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
548 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
549 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
550 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
551 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
552 ; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
553 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
554 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
555 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc
556 ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4
557 ; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5
558 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4
559 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
560 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
561 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
562 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
563 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
564 ; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5
565 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
566 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5
567 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4
568 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
569 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
570 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
571 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
572 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
573 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
574 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
575 ; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
576 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8
577 ; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4
578 ; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4
579 ; GISEL-NEXT: v_mul_lo_u32 v5, v6, v5
580 ; GISEL-NEXT: v_mul_hi_u32 v4, v6, v4
581 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v9, v5
582 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
583 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
584 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v3, v4, vcc
585 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v4
586 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v7
587 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
588 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6
589 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
590 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v7
591 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v4, v8, s[4:5]
592 ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v6
593 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
594 ; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
595 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7
596 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
597 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v6
598 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
599 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7
600 ; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
601 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v8, v6
602 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
603 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
604 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
605 ; GISEL-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
606 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
607 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
608 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
609 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
610 ; GISEL-NEXT: s_setpc_b64 s[30:31]
612 ; CGP-LABEL: v_urem_v2i64:
614 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615 ; CGP-NEXT: v_mov_b32_e32 v10, v0
616 ; CGP-NEXT: v_mov_b32_e32 v11, v1
617 ; CGP-NEXT: v_mov_b32_e32 v8, v2
618 ; CGP-NEXT: v_mov_b32_e32 v9, v3
619 ; CGP-NEXT: v_or_b32_e32 v1, v11, v5
620 ; CGP-NEXT: v_mov_b32_e32 v0, 0
621 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
622 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
623 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
624 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
625 ; CGP-NEXT: s_cbranch_execz .LBB2_2
627 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
628 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5
629 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, 0, v4
630 ; CGP-NEXT: v_subb_u32_e32 v3, vcc, 0, v5, vcc
631 ; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
632 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
633 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
634 ; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
635 ; CGP-NEXT: v_trunc_f32_e32 v1, v1
636 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
637 ; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1
638 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
639 ; CGP-NEXT: v_mul_lo_u32 v12, v2, v1
640 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v0
641 ; CGP-NEXT: v_mul_lo_u32 v14, v3, v0
642 ; CGP-NEXT: v_mul_hi_u32 v15, v2, v0
643 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
644 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v13
645 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v13
646 ; CGP-NEXT: v_mul_hi_u32 v13, v1, v13
647 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
648 ; CGP-NEXT: v_mul_lo_u32 v15, v0, v12
649 ; CGP-NEXT: v_mul_lo_u32 v17, v1, v12
650 ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12
651 ; CGP-NEXT: v_mul_hi_u32 v12, v1, v12
652 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
653 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
654 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13
655 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
656 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
657 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
658 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18
659 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
660 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
661 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16
662 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
663 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
664 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
665 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
666 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13
667 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v12, vcc
668 ; CGP-NEXT: v_mul_lo_u32 v12, v2, v0
669 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v0
670 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v0
671 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
672 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v12
673 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v12
674 ; CGP-NEXT: v_mul_hi_u32 v12, v1, v12
675 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
676 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13
677 ; CGP-NEXT: v_mul_lo_u32 v3, v0, v2
678 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v2
679 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v2
680 ; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
681 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
682 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
683 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
684 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
685 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15
686 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
687 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
688 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
689 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
690 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
691 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3
692 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
693 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
694 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
695 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3
696 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
697 ; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
698 ; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
699 ; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
700 ; CGP-NEXT: v_mul_lo_u32 v12, v10, v1
701 ; CGP-NEXT: v_mul_lo_u32 v13, v11, v1
702 ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1
703 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1
704 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
705 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
706 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0
707 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
708 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
709 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
710 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
711 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
712 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
713 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3
714 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
715 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
716 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
717 ; CGP-NEXT: v_mul_lo_u32 v3, v4, v0
718 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v0
719 ; CGP-NEXT: v_mul_hi_u32 v0, v4, v0
720 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2
721 ; CGP-NEXT: v_mul_lo_u32 v1, v4, v1
722 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
723 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
724 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v3
725 ; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v11, v0, vcc
726 ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v0
727 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4
728 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
729 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
730 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
731 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
732 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5
733 ; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
734 ; CGP-NEXT: v_sub_i32_e32 v10, vcc, v1, v4
735 ; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v0, vcc
736 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
737 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
738 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
739 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5
740 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
741 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v10, v4
742 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
743 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5
744 ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v12, vcc
745 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
746 ; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
747 ; CGP-NEXT: v_cndmask_b32_e32 v5, v11, v0, vcc
748 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
749 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
750 ; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
751 ; CGP-NEXT: ; implicit-def: $vgpr4
752 ; CGP-NEXT: ; implicit-def: $vgpr10
753 ; CGP-NEXT: .LBB2_2: ; %Flow2
754 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
755 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
756 ; CGP-NEXT: s_cbranch_execz .LBB2_4
758 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
759 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
760 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
761 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
762 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
763 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
764 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
765 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
766 ; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
767 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v4
768 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
769 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
770 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
771 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
772 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
773 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
774 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
775 ; CGP-NEXT: v_mov_b32_e32 v1, 0
777 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
778 ; CGP-NEXT: v_or_b32_e32 v3, v9, v7
779 ; CGP-NEXT: v_mov_b32_e32 v2, 0
780 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
781 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
782 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
783 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
784 ; CGP-NEXT: s_cbranch_execz .LBB2_6
786 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
787 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v7
788 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6
789 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc
790 ; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
791 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
792 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
793 ; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
794 ; CGP-NEXT: v_trunc_f32_e32 v3, v3
795 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
796 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
797 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
798 ; CGP-NEXT: v_mul_lo_u32 v10, v4, v3
799 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v2
800 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v2
801 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v2
802 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
803 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v11
804 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
805 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v11
806 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
807 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v10
808 ; CGP-NEXT: v_mul_lo_u32 v15, v3, v10
809 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10
810 ; CGP-NEXT: v_mul_hi_u32 v10, v3, v10
811 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
812 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
813 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
814 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
815 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
816 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
817 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
818 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
819 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
820 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
821 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
822 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
823 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
824 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
825 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
826 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
827 ; CGP-NEXT: v_mul_lo_u32 v10, v4, v2
828 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
829 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v2
830 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
831 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v10
832 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v10
833 ; CGP-NEXT: v_mul_hi_u32 v10, v3, v10
834 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
835 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
836 ; CGP-NEXT: v_mul_lo_u32 v5, v2, v4
837 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v4
838 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v4
839 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
840 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
841 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
842 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
843 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
844 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
845 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
846 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
847 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
848 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
849 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
850 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
851 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
852 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
853 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
854 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
855 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc
856 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v2
857 ; CGP-NEXT: v_mul_hi_u32 v5, v8, v2
858 ; CGP-NEXT: v_mul_hi_u32 v2, v9, v2
859 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v3
860 ; CGP-NEXT: v_mul_lo_u32 v11, v9, v3
861 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3
862 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3
863 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
864 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
865 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
866 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
867 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
868 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
869 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
870 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
871 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
872 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
873 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
874 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
875 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
876 ; CGP-NEXT: v_mul_lo_u32 v5, v6, v2
877 ; CGP-NEXT: v_mul_lo_u32 v10, v7, v2
878 ; CGP-NEXT: v_mul_hi_u32 v2, v6, v2
879 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
880 ; CGP-NEXT: v_mul_lo_u32 v3, v6, v3
881 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3
882 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
883 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5
884 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v2, vcc
885 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v9, v2
886 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6
887 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
888 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7
889 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
890 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc
891 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7
892 ; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
893 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v3, v6
894 ; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v2, vcc
895 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6
896 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
897 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc
898 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7
899 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
900 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v8, v6
901 ; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
902 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7
903 ; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v10, vcc
904 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
905 ; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
906 ; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v2, vcc
907 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
908 ; CGP-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc
909 ; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
910 ; CGP-NEXT: ; implicit-def: $vgpr6
911 ; CGP-NEXT: ; implicit-def: $vgpr8
912 ; CGP-NEXT: .LBB2_6: ; %Flow
913 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
914 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
915 ; CGP-NEXT: s_cbranch_execz .LBB2_8
917 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
918 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
919 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
920 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
921 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
922 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
923 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
924 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
925 ; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
926 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v6
927 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
928 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
929 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
930 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
931 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
932 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
933 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
934 ; CGP-NEXT: v_mov_b32_e32 v3, 0
936 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
937 ; CGP-NEXT: s_setpc_b64 s[30:31]
938 %result = urem <2 x i64> %num, %den
939 ret <2 x i64> %result
942 define i64 @v_urem_i64_pow2k_denom(i64 %num) {
943 ; CHECK-LABEL: v_urem_i64_pow2k_denom:
945 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
946 ; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
947 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
948 ; CHECK-NEXT: s_setpc_b64 s[30:31]
949 %result = urem i64 %num, 4096
953 define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
954 ; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
956 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
957 ; CHECK-NEXT: s_movk_i32 s4, 0xfff
958 ; CHECK-NEXT: v_and_b32_e32 v0, s4, v0
959 ; CHECK-NEXT: v_and_b32_e32 v2, s4, v2
960 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
961 ; CHECK-NEXT: v_mov_b32_e32 v3, 0
962 ; CHECK-NEXT: s_setpc_b64 s[30:31]
963 %result = urem <2 x i64> %num, <i64 4096, i64 4096>
964 ret <2 x i64> %result
967 define i64 @v_urem_i64_oddk_denom(i64 %num) {
968 ; CHECK-LABEL: v_urem_i64_oddk_denom:
970 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971 ; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
972 ; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
973 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v3, 0
974 ; CHECK-NEXT: s_mov_b32 s5, 0xffed2705
975 ; CHECK-NEXT: s_bfe_i32 s6, -1, 0x10000
976 ; CHECK-NEXT: s_bfe_i32 s7, -1, 0x10000
977 ; CHECK-NEXT: v_cvt_f32_u32_e32 v4, s4
978 ; CHECK-NEXT: v_mov_b32_e32 v5, s6
979 ; CHECK-NEXT: v_mov_b32_e32 v6, s7
980 ; CHECK-NEXT: v_mac_f32_e32 v4, 0x4f800000, v3
981 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v4
982 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
983 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
984 ; CHECK-NEXT: v_trunc_f32_e32 v4, v4
985 ; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
986 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
987 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
988 ; CHECK-NEXT: v_mul_lo_u32 v7, s5, v4
989 ; CHECK-NEXT: v_mul_lo_u32 v8, s5, v3
990 ; CHECK-NEXT: v_mul_lo_u32 v9, -1, v3
991 ; CHECK-NEXT: v_mul_hi_u32 v10, s5, v3
992 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
993 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v8
994 ; CHECK-NEXT: v_mul_hi_u32 v11, v3, v8
995 ; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8
996 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10
997 ; CHECK-NEXT: v_mul_lo_u32 v10, v3, v7
998 ; CHECK-NEXT: v_mul_lo_u32 v12, v4, v7
999 ; CHECK-NEXT: v_mul_hi_u32 v13, v3, v7
1000 ; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7
1001 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1002 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1003 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v12, v8
1004 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1005 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1006 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1007 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v13
1008 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1009 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9
1010 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v12, v11
1011 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
1012 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1013 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9
1014 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
1015 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v8
1016 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v7, vcc
1017 ; CHECK-NEXT: v_mul_lo_u32 v7, s5, v3
1018 ; CHECK-NEXT: v_mul_lo_u32 v8, -1, v3
1019 ; CHECK-NEXT: v_mul_hi_u32 v9, s5, v3
1020 ; CHECK-NEXT: v_mul_lo_u32 v10, s5, v4
1021 ; CHECK-NEXT: v_mul_lo_u32 v11, v4, v7
1022 ; CHECK-NEXT: v_mul_hi_u32 v12, v3, v7
1023 ; CHECK-NEXT: v_mul_hi_u32 v7, v4, v7
1024 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1025 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
1026 ; CHECK-NEXT: v_mul_lo_u32 v9, v3, v8
1027 ; CHECK-NEXT: v_mul_lo_u32 v10, v4, v8
1028 ; CHECK-NEXT: v_mul_hi_u32 v13, v3, v8
1029 ; CHECK-NEXT: v_mul_hi_u32 v8, v4, v8
1030 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1031 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1032 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1033 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1034 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v12
1035 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1036 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v13
1037 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1038 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1039 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1040 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
1041 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1042 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v10, v9
1043 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v9
1044 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
1045 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc
1046 ; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3
1047 ; CHECK-NEXT: v_mul_hi_u32 v8, v0, v3
1048 ; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
1049 ; CHECK-NEXT: v_mul_lo_u32 v9, v0, v4
1050 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v4
1051 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v4
1052 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
1053 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
1054 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1055 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v10, v3
1056 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1057 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
1058 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1059 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v11
1060 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1061 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1062 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
1063 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v7
1064 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1065 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1066 ; CHECK-NEXT: v_mul_lo_u32 v8, s4, v3
1067 ; CHECK-NEXT: v_mul_lo_u32 v9, 0, v3
1068 ; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3
1069 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
1070 ; CHECK-NEXT: v_mul_lo_u32 v4, s4, v4
1071 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v9, v4
1072 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1073 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
1074 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v1, v3, vcc
1075 ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v3
1076 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v2
1077 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
1078 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4
1079 ; CHECK-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5]
1080 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1081 ; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
1082 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1083 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v2
1084 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
1085 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1086 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v6, v7, vcc
1087 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
1088 ; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
1089 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1090 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc
1091 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1092 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1093 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1094 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
1095 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1096 %result = urem i64 %num, 1235195
1100 define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
1101 ; GISEL-LABEL: v_urem_v2i64_oddk_denom:
1103 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1104 ; GISEL-NEXT: s_mov_b32 s8, 0x12d8fb
1105 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, s8
1106 ; GISEL-NEXT: s_sub_u32 s6, 0, s8
1107 ; GISEL-NEXT: s_cselect_b32 s4, 1, 0
1108 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
1109 ; GISEL-NEXT: v_mov_b32_e32 v6, v4
1110 ; GISEL-NEXT: s_and_b32 s4, s4, 1
1111 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
1112 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v5
1113 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
1114 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v6
1115 ; GISEL-NEXT: s_cmp_lg_u32 s4, 0
1116 ; GISEL-NEXT: s_subb_u32 s7, 0, 0
1117 ; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
1118 ; GISEL-NEXT: s_bfe_i32 s5, -1, 0x10000
1119 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v4
1120 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v5
1121 ; GISEL-NEXT: v_mov_b32_e32 v5, s4
1122 ; GISEL-NEXT: v_mov_b32_e32 v4, s5
1123 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
1124 ; GISEL-NEXT: s_sub_u32 s9, 0, s8
1125 ; GISEL-NEXT: s_cselect_b32 s4, 1, 0
1126 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v7
1127 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8
1128 ; GISEL-NEXT: s_and_b32 s4, s4, 1
1129 ; GISEL-NEXT: v_trunc_f32_e32 v9, v9
1130 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
1131 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
1132 ; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v9
1133 ; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
1134 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1135 ; GISEL-NEXT: s_cmp_lg_u32 s4, 0
1136 ; GISEL-NEXT: s_subb_u32 s10, 0, 0
1137 ; GISEL-NEXT: v_mul_lo_u32 v10, s9, v8
1138 ; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
1139 ; GISEL-NEXT: s_bfe_i32 s11, -1, 0x10000
1140 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1141 ; GISEL-NEXT: v_mul_lo_u32 v11, s6, v9
1142 ; GISEL-NEXT: v_mul_lo_u32 v12, s9, v6
1143 ; GISEL-NEXT: v_mul_lo_u32 v13, s10, v6
1144 ; GISEL-NEXT: v_mul_hi_u32 v14, s9, v6
1145 ; GISEL-NEXT: v_mov_b32_e32 v15, s4
1146 ; GISEL-NEXT: v_mul_lo_u32 v16, s6, v7
1147 ; GISEL-NEXT: v_mul_lo_u32 v17, s7, v7
1148 ; GISEL-NEXT: v_mul_hi_u32 v18, s6, v7
1149 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
1150 ; GISEL-NEXT: v_mul_lo_u32 v13, v8, v12
1151 ; GISEL-NEXT: v_mul_hi_u32 v19, v6, v12
1152 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
1153 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v17, v11
1154 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v16
1155 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
1156 ; GISEL-NEXT: v_mul_hi_u32 v14, v7, v16
1157 ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v16
1158 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
1159 ; GISEL-NEXT: v_mul_lo_u32 v18, v7, v11
1160 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
1161 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1162 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
1163 ; GISEL-NEXT: v_mul_lo_u32 v14, v6, v10
1164 ; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10
1165 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14
1166 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1167 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v19
1168 ; GISEL-NEXT: v_mul_hi_u32 v13, v6, v10
1169 ; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
1170 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1171 ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
1172 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11
1173 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v17, v12
1174 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
1175 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v13
1176 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1177 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13
1178 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1179 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17
1180 ; GISEL-NEXT: v_mul_hi_u32 v18, v7, v11
1181 ; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11
1182 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v16
1183 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1184 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18
1185 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1186 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
1187 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1188 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1189 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v17
1190 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1191 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1192 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17
1193 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
1194 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
1195 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1196 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc
1197 ; GISEL-NEXT: v_mul_lo_u32 v10, s9, v6
1198 ; GISEL-NEXT: v_mul_lo_u32 v12, s10, v6
1199 ; GISEL-NEXT: v_mul_hi_u32 v13, s9, v6
1200 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v16
1201 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v11, vcc
1202 ; GISEL-NEXT: v_mul_lo_u32 v11, s6, v7
1203 ; GISEL-NEXT: v_mul_lo_u32 v14, s7, v7
1204 ; GISEL-NEXT: v_mul_hi_u32 v16, s6, v7
1205 ; GISEL-NEXT: v_mul_lo_u32 v17, s9, v8
1206 ; GISEL-NEXT: v_mul_lo_u32 v18, v8, v10
1207 ; GISEL-NEXT: v_mul_hi_u32 v19, v6, v10
1208 ; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
1209 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1210 ; GISEL-NEXT: v_mul_lo_u32 v17, s6, v9
1211 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
1212 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v11
1213 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1214 ; GISEL-NEXT: v_mul_hi_u32 v13, v7, v11
1215 ; GISEL-NEXT: v_mul_hi_u32 v11, v9, v11
1216 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
1217 ; GISEL-NEXT: v_mul_lo_u32 v16, v7, v14
1218 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
1219 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1220 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
1221 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v12
1222 ; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12
1223 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v18, v13
1224 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1225 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v13, v19
1226 ; GISEL-NEXT: v_mul_hi_u32 v13, v6, v12
1227 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1228 ; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19
1229 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v14
1230 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v16, v10
1231 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1232 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v13
1233 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1234 ; GISEL-NEXT: v_add_i32_e64 v13, s[4:5], v16, v13
1235 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1236 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
1237 ; GISEL-NEXT: v_mul_hi_u32 v17, v7, v14
1238 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v19, v11
1239 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1240 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v17
1241 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1242 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
1243 ; GISEL-NEXT: v_mov_b32_e32 v19, s11
1244 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
1245 ; GISEL-NEXT: v_mul_hi_u32 v14, v9, v14
1246 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v18
1247 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1248 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v16
1249 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1250 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
1251 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v17, v16
1252 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1253 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v16
1254 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1255 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
1256 ; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6
1257 ; GISEL-NEXT: v_mul_hi_u32 v12, v2, v6
1258 ; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
1259 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
1260 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
1261 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7
1262 ; GISEL-NEXT: v_mul_hi_u32 v13, v0, v7
1263 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
1264 ; GISEL-NEXT: v_mul_lo_u32 v14, v2, v8
1265 ; GISEL-NEXT: v_mul_lo_u32 v16, v3, v8
1266 ; GISEL-NEXT: v_mul_hi_u32 v17, v2, v8
1267 ; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8
1268 ; GISEL-NEXT: v_mul_lo_u32 v18, v0, v9
1269 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v18
1270 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1271 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1272 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9
1273 ; GISEL-NEXT: v_mul_hi_u32 v13, v0, v9
1274 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
1275 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14
1276 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1277 ; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v16, v6
1278 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1279 ; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v11, v7
1280 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, s[4:5]
1281 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v12
1282 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
1283 ; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v17
1284 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1285 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1286 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
1287 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1288 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
1289 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
1290 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v17
1291 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1292 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1293 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1294 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v14
1295 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1296 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
1297 ; GISEL-NEXT: v_mul_lo_u32 v12, s8, v6
1298 ; GISEL-NEXT: v_mul_lo_u32 v14, 0, v6
1299 ; GISEL-NEXT: v_mul_hi_u32 v6, s8, v6
1300 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1301 ; GISEL-NEXT: v_mul_lo_u32 v13, s8, v7
1302 ; GISEL-NEXT: v_mul_lo_u32 v16, 0, v7
1303 ; GISEL-NEXT: v_mul_hi_u32 v7, s8, v7
1304 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1305 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1306 ; GISEL-NEXT: v_mul_lo_u32 v8, s8, v8
1307 ; GISEL-NEXT: v_mul_lo_u32 v9, s8, v9
1308 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v14, v8
1309 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
1310 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
1311 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1312 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v12
1313 ; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v6, vcc
1314 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6
1315 ; GISEL-NEXT: v_cmp_le_u32_e64 s[4:5], s8, v2
1316 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1317 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v13
1318 ; GISEL-NEXT: v_subb_u32_e64 v9, s[6:7], v1, v7, s[4:5]
1319 ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v7
1320 ; GISEL-NEXT: v_cmp_le_u32_e64 s[6:7], s8, v0
1321 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7]
1322 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v8
1323 ; GISEL-NEXT: v_cndmask_b32_e64 v6, v15, v6, s[6:7]
1324 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1325 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
1326 ; GISEL-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc
1327 ; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1328 ; GISEL-NEXT: v_subrev_i32_e32 v7, vcc, s8, v2
1329 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1330 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v7
1331 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
1332 ; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, s8, v0
1333 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1334 ; GISEL-NEXT: v_cmp_le_u32_e32 vcc, s8, v11
1335 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
1336 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1337 ; GISEL-NEXT: v_cndmask_b32_e32 v10, v19, v10, vcc
1338 ; GISEL-NEXT: v_subrev_i32_e32 v13, vcc, s8, v7
1339 ; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v3, vcc
1340 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1341 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v12, vcc
1342 ; GISEL-NEXT: v_subrev_i32_e32 v12, vcc, s8, v11
1343 ; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v1, vcc
1344 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
1345 ; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v13, vcc
1346 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
1347 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v12, s[4:5]
1348 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v14, vcc
1349 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1350 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v7, vcc
1351 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v15, s[4:5]
1352 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
1353 ; GISEL-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5]
1354 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5]
1355 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v8, v3, vcc
1356 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1358 ; CGP-LABEL: v_urem_v2i64_oddk_denom:
1360 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1361 ; CGP-NEXT: s_mov_b32 s8, 0x12d8fb
1362 ; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
1363 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v5, 0
1364 ; CGP-NEXT: s_mov_b32 s6, 0xffed2705
1365 ; CGP-NEXT: s_bfe_i32 s4, -1, 0x10000
1366 ; CGP-NEXT: s_bfe_i32 s5, -1, 0x10000
1367 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
1368 ; CGP-NEXT: s_bfe_i32 s7, -1, 0x10000
1369 ; CGP-NEXT: s_bfe_i32 s9, -1, 0x10000
1370 ; CGP-NEXT: v_cvt_f32_u32_e32 v7, s8
1371 ; CGP-NEXT: v_mov_b32_e32 v8, s4
1372 ; CGP-NEXT: v_mov_b32_e32 v9, s5
1373 ; CGP-NEXT: v_cvt_f32_u32_e32 v10, v4
1374 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v5
1375 ; CGP-NEXT: v_mac_f32_e32 v10, 0x4f800000, v6
1376 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v7
1377 ; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v10
1378 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
1379 ; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1380 ; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
1381 ; CGP-NEXT: v_mul_f32_e32 v10, 0x2f800000, v6
1382 ; CGP-NEXT: v_trunc_f32_e32 v7, v7
1383 ; CGP-NEXT: v_trunc_f32_e32 v10, v10
1384 ; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
1385 ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
1386 ; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v10
1387 ; CGP-NEXT: v_cvt_u32_f32_e32 v10, v10
1388 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
1389 ; CGP-NEXT: v_mul_lo_u32 v11, s6, v7
1390 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
1391 ; CGP-NEXT: v_mul_lo_u32 v12, s6, v10
1392 ; CGP-NEXT: v_mul_lo_u32 v13, s6, v5
1393 ; CGP-NEXT: v_mul_lo_u32 v14, -1, v5
1394 ; CGP-NEXT: v_mul_hi_u32 v15, s6, v5
1395 ; CGP-NEXT: v_mul_lo_u32 v16, s6, v6
1396 ; CGP-NEXT: v_mul_lo_u32 v17, -1, v6
1397 ; CGP-NEXT: v_mul_hi_u32 v18, s6, v6
1398 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
1399 ; CGP-NEXT: v_mul_lo_u32 v14, v7, v13
1400 ; CGP-NEXT: v_mul_hi_u32 v19, v5, v13
1401 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v13
1402 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
1403 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v16
1404 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
1405 ; CGP-NEXT: v_mul_hi_u32 v15, v6, v16
1406 ; CGP-NEXT: v_mul_hi_u32 v16, v10, v16
1407 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v18
1408 ; CGP-NEXT: v_mul_lo_u32 v18, v6, v12
1409 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18
1410 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1411 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v15
1412 ; CGP-NEXT: v_mul_lo_u32 v15, v5, v11
1413 ; CGP-NEXT: v_mul_lo_u32 v17, v7, v11
1414 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v15
1415 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1416 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
1417 ; CGP-NEXT: v_mul_hi_u32 v14, v5, v11
1418 ; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
1419 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1420 ; CGP-NEXT: v_add_i32_e64 v15, s[4:5], v15, v19
1421 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v12
1422 ; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v17, v13
1423 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[4:5]
1424 ; CGP-NEXT: v_add_i32_e64 v13, s[4:5], v13, v14
1425 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1426 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v17, v14
1427 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1428 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v18, v17
1429 ; CGP-NEXT: v_mul_hi_u32 v18, v6, v12
1430 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12
1431 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v19, v16
1432 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1433 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v18
1434 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1435 ; CGP-NEXT: v_add_i32_e32 v18, vcc, v19, v18
1436 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
1437 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1438 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v16, v17
1439 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1440 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1441 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v17
1442 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14
1443 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
1444 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
1445 ; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
1446 ; CGP-NEXT: v_mul_lo_u32 v11, s6, v5
1447 ; CGP-NEXT: v_mul_lo_u32 v13, -1, v5
1448 ; CGP-NEXT: v_mul_hi_u32 v14, s6, v5
1449 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v16
1450 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc
1451 ; CGP-NEXT: v_mul_lo_u32 v12, s6, v6
1452 ; CGP-NEXT: v_mul_lo_u32 v15, -1, v6
1453 ; CGP-NEXT: v_mul_hi_u32 v16, s6, v6
1454 ; CGP-NEXT: v_mul_lo_u32 v17, s6, v7
1455 ; CGP-NEXT: v_mul_lo_u32 v18, v7, v11
1456 ; CGP-NEXT: v_mul_hi_u32 v19, v5, v11
1457 ; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
1458 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17
1459 ; CGP-NEXT: v_mul_lo_u32 v17, s6, v10
1460 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17
1461 ; CGP-NEXT: v_mul_lo_u32 v17, v10, v12
1462 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1463 ; CGP-NEXT: v_mul_hi_u32 v14, v6, v12
1464 ; CGP-NEXT: v_mul_hi_u32 v12, v10, v12
1465 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1466 ; CGP-NEXT: v_mul_lo_u32 v16, v6, v15
1467 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
1468 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1469 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
1470 ; CGP-NEXT: v_mul_lo_u32 v14, v5, v13
1471 ; CGP-NEXT: v_mul_lo_u32 v16, v7, v13
1472 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v18, v14
1473 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1474 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v14, v19
1475 ; CGP-NEXT: v_mul_hi_u32 v14, v5, v13
1476 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1477 ; CGP-NEXT: v_add_i32_e64 v18, s[4:5], v18, v19
1478 ; CGP-NEXT: v_mul_lo_u32 v19, v10, v15
1479 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v16, v11
1480 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1481 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14
1482 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1483 ; CGP-NEXT: v_add_i32_e64 v14, s[4:5], v16, v14
1484 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1485 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
1486 ; CGP-NEXT: v_mul_hi_u32 v17, v6, v15
1487 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v19, v12
1488 ; CGP-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1489 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1490 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1491 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v19, v17
1492 ; CGP-NEXT: v_mov_b32_e32 v19, s7
1493 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v18
1494 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1495 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v18
1496 ; CGP-NEXT: v_mov_b32_e32 v18, s9
1497 ; CGP-NEXT: v_mul_hi_u32 v13, v7, v13
1498 ; CGP-NEXT: v_mul_hi_u32 v15, v10, v15
1499 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1500 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1501 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v17, v16
1502 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1503 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v16
1504 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
1505 ; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v13, vcc
1506 ; CGP-NEXT: v_mul_lo_u32 v11, v1, v5
1507 ; CGP-NEXT: v_mul_hi_u32 v13, v0, v5
1508 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
1509 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1510 ; CGP-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc
1511 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v6
1512 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v6
1513 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
1514 ; CGP-NEXT: v_mul_lo_u32 v15, v0, v7
1515 ; CGP-NEXT: v_mul_lo_u32 v16, v1, v7
1516 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v7
1517 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
1518 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
1519 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1520 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1521 ; CGP-NEXT: v_mul_lo_u32 v11, v2, v10
1522 ; CGP-NEXT: v_mul_lo_u32 v13, v3, v10
1523 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v12, v11
1524 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
1525 ; CGP-NEXT: v_add_i32_e64 v11, s[4:5], v11, v14
1526 ; CGP-NEXT: v_mul_hi_u32 v11, v2, v10
1527 ; CGP-NEXT: v_mul_hi_u32 v10, v3, v10
1528 ; CGP-NEXT: v_add_i32_e64 v5, s[6:7], v16, v5
1529 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[6:7]
1530 ; CGP-NEXT: v_add_i32_e64 v6, s[6:7], v13, v6
1531 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[6:7]
1532 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1533 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v17
1534 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1535 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1536 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1537 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v11
1538 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1539 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v17
1540 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1541 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
1542 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15
1543 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1544 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1545 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1546 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1547 ; CGP-NEXT: v_mul_lo_u32 v14, s8, v5
1548 ; CGP-NEXT: v_mul_lo_u32 v15, 0, v5
1549 ; CGP-NEXT: v_mul_hi_u32 v5, s8, v5
1550 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1551 ; CGP-NEXT: v_mul_lo_u32 v12, s8, v6
1552 ; CGP-NEXT: v_mul_lo_u32 v16, 0, v6
1553 ; CGP-NEXT: v_mul_hi_u32 v6, s8, v6
1554 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v13
1555 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1556 ; CGP-NEXT: v_mul_lo_u32 v7, s8, v7
1557 ; CGP-NEXT: v_mul_lo_u32 v10, s8, v10
1558 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v15, v7
1559 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10
1560 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
1561 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v10, v6
1562 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v14
1563 ; CGP-NEXT: v_subb_u32_e64 v7, s[4:5], v1, v5, vcc
1564 ; CGP-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v5
1565 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
1566 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
1567 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v12
1568 ; CGP-NEXT: v_subb_u32_e64 v10, s[6:7], v3, v6, s[4:5]
1569 ; CGP-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v6
1570 ; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v4
1571 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7]
1572 ; CGP-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v7
1573 ; CGP-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7]
1574 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1575 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
1576 ; CGP-NEXT: v_cndmask_b32_e32 v6, v19, v6, vcc
1577 ; CGP-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
1578 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v0, v4
1579 ; CGP-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1580 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
1581 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
1582 ; CGP-NEXT: v_sub_i32_e32 v12, vcc, v2, v4
1583 ; CGP-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
1584 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v4
1585 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
1586 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
1587 ; CGP-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
1588 ; CGP-NEXT: v_sub_i32_e32 v11, vcc, v8, v4
1589 ; CGP-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v1, vcc
1590 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
1591 ; CGP-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc
1592 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v12, v4
1593 ; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
1594 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
1595 ; CGP-NEXT: v_cndmask_b32_e32 v8, v8, v11, vcc
1596 ; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13
1597 ; CGP-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5]
1598 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
1599 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
1600 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc
1601 ; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
1602 ; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6
1603 ; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
1604 ; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
1605 ; CGP-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[4:5]
1606 ; CGP-NEXT: s_setpc_b64 s[30:31]
1607 %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
1608 ret <2 x i64> %result
1611 define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
1612 ; CHECK-LABEL: v_urem_i64_pow2_shl_denom:
1614 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1615 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
1616 ; CHECK-NEXT: v_mov_b32_e32 v4, v1
1617 ; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000
1618 ; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2
1619 ; CHECK-NEXT: v_or_b32_e32 v1, v4, v6
1620 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
1621 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1622 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
1623 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
1624 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1625 ; CHECK-NEXT: s_cbranch_execz .LBB7_2
1626 ; CHECK-NEXT: ; %bb.1:
1627 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
1628 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6
1629 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v5
1630 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v6, vcc
1631 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
1632 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
1633 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
1634 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
1635 ; CHECK-NEXT: v_trunc_f32_e32 v1, v1
1636 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
1637 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
1638 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1639 ; CHECK-NEXT: v_mul_lo_u32 v8, v2, v1
1640 ; CHECK-NEXT: v_mul_lo_u32 v9, v2, v0
1641 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
1642 ; CHECK-NEXT: v_mul_hi_u32 v11, v2, v0
1643 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
1644 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v9
1645 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
1646 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v9
1647 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
1648 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
1649 ; CHECK-NEXT: v_mul_lo_u32 v13, v1, v8
1650 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
1651 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
1652 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1653 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1654 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
1655 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1656 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1657 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1658 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
1659 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1660 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1661 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
1662 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1663 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1664 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1665 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1666 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
1667 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
1668 ; CHECK-NEXT: v_mul_lo_u32 v8, v2, v0
1669 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
1670 ; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0
1671 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
1672 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v8
1673 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
1674 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
1675 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1676 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9
1677 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v2
1678 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v2
1679 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v2
1680 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
1681 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1682 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1683 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1684 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1685 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
1686 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1687 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1688 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1689 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1690 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1691 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1692 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1693 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1694 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
1695 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
1696 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
1697 ; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
1698 ; CHECK-NEXT: v_mul_hi_u32 v7, v3, v0
1699 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
1700 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v1
1701 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1
1702 ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1
1703 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
1704 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
1705 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1706 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
1707 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1708 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1709 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1710 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
1711 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1712 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1713 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1714 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1715 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1716 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1717 ; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0
1718 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0
1719 ; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
1720 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1721 ; CHECK-NEXT: v_mul_lo_u32 v1, v5, v1
1722 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1
1723 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1724 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v7
1725 ; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v0, vcc
1726 ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0
1727 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5
1728 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
1729 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6
1730 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1731 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc
1732 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6
1733 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1734 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
1735 ; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc
1736 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v5
1737 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1738 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc
1739 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v6
1740 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
1741 ; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5
1742 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
1743 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6
1744 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v8, vcc
1745 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1746 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1747 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
1748 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1749 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
1750 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
1751 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
1752 ; CHECK-NEXT: ; implicit-def: $vgpr3
1753 ; CHECK-NEXT: .LBB7_2: ; %Flow
1754 ; CHECK-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
1755 ; CHECK-NEXT: s_xor_b64 exec, exec, s[4:5]
1756 ; CHECK-NEXT: s_cbranch_execz .LBB7_4
1757 ; CHECK-NEXT: ; %bb.3:
1758 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
1759 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
1760 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
1761 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1762 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1763 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
1764 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
1765 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1766 ; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
1767 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, v5
1768 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v3, v0
1769 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v5
1770 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
1771 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1772 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v5
1773 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
1774 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1775 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
1776 ; CHECK-NEXT: .LBB7_4:
1777 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
1778 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1779 %shl.y = shl i64 4096, %y
1780 %r = urem i64 %x, %shl.y
1784 define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
1785 ; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
1787 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1788 ; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000
1789 ; GISEL-NEXT: v_lshl_b64 v[7:8], s[4:5], v4
1790 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v6
1791 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v7
1792 ; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v8
1793 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v9
1794 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
1795 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1796 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v6
1797 ; GISEL-NEXT: v_trunc_f32_e32 v9, v9
1798 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v9
1799 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1800 ; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
1801 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v7
1802 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc
1803 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6
1804 ; GISEL-NEXT: v_mul_lo_u32 v13, v11, v6
1805 ; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9
1806 ; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6
1807 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1808 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
1809 ; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12
1810 ; GISEL-NEXT: v_mul_lo_u32 v15, v6, v13
1811 ; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12
1812 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1813 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1814 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
1815 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1816 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
1817 ; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13
1818 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
1819 ; GISEL-NEXT: v_mul_hi_u32 v16, v6, v13
1820 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1821 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1822 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1823 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1824 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1825 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1826 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1827 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
1828 ; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
1829 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1830 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1831 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
1832 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6
1833 ; GISEL-NEXT: v_mul_lo_u32 v11, v11, v6
1834 ; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9
1835 ; GISEL-NEXT: v_mul_hi_u32 v10, v10, v6
1836 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1837 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1838 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v12
1839 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v10
1840 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v12
1841 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1842 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1843 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
1844 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1845 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
1846 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v10
1847 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
1848 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10
1849 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1850 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1851 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1852 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1853 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1854 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
1855 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1856 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1857 ; GISEL-NEXT: v_mul_hi_u32 v10, v9, v10
1858 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1859 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
1860 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc
1861 ; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6
1862 ; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9
1863 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v6
1864 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1865 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1866 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1867 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1868 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1869 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9
1870 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
1871 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9
1872 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6
1873 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1874 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1875 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1876 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1877 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1878 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1879 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1880 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
1881 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1882 ; GISEL-NEXT: v_mul_lo_u32 v10, v7, v6
1883 ; GISEL-NEXT: v_mul_lo_u32 v11, v8, v6
1884 ; GISEL-NEXT: v_mul_lo_u32 v9, v7, v9
1885 ; GISEL-NEXT: v_mul_hi_u32 v6, v7, v6
1886 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1887 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
1888 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
1889 ; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], v1, v6, vcc
1890 ; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v6
1891 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v8
1892 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
1893 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7
1894 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1895 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v9, v8
1896 ; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v10, s[4:5]
1897 ; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], v0, v7
1898 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
1899 ; GISEL-NEXT: v_subbrev_u32_e64 v11, vcc, 0, v1, s[4:5]
1900 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v8
1901 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
1902 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v7
1903 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
1904 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v11, v8
1905 ; GISEL-NEXT: v_cndmask_b32_e32 v12, v12, v13, vcc
1906 ; GISEL-NEXT: v_sub_i32_e32 v7, vcc, v10, v7
1907 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5]
1908 ; GISEL-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
1909 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v12
1910 ; GISEL-NEXT: v_cndmask_b32_e32 v7, v10, v7, vcc
1911 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v11, v1, vcc
1912 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1913 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v7, vcc
1914 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
1915 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v4
1916 ; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v5
1917 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7
1918 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
1919 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1920 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6
1921 ; GISEL-NEXT: v_trunc_f32_e32 v7, v7
1922 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7
1923 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1924 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1925 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4
1926 ; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v5, vcc
1927 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6
1928 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v6
1929 ; GISEL-NEXT: v_mul_lo_u32 v12, v8, v7
1930 ; GISEL-NEXT: v_mul_hi_u32 v13, v8, v6
1931 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1932 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1933 ; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10
1934 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11
1935 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10
1936 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1937 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1938 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1939 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1940 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1941 ; GISEL-NEXT: v_mul_lo_u32 v13, v7, v11
1942 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
1943 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v11
1944 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
1945 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1946 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
1947 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1948 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1949 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1950 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1951 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1952 ; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
1953 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1954 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1955 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
1956 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6
1957 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v6
1958 ; GISEL-NEXT: v_mul_lo_u32 v11, v8, v7
1959 ; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6
1960 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1961 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1962 ; GISEL-NEXT: v_mul_lo_u32 v9, v7, v10
1963 ; GISEL-NEXT: v_mul_lo_u32 v11, v6, v8
1964 ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v10
1965 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1966 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1967 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
1968 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1969 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1970 ; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
1971 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
1972 ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
1973 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1974 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1975 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1976 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1977 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1978 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
1979 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1980 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1981 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
1982 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1983 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
1984 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
1985 ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6
1986 ; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7
1987 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v6
1988 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
1989 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1990 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1991 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1992 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1993 ; GISEL-NEXT: v_mul_lo_u32 v9, v3, v7
1994 ; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
1995 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v7
1996 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
1997 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1998 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1999 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2000 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
2001 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
2002 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2003 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
2004 ; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7
2005 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
2006 ; GISEL-NEXT: v_mul_lo_u32 v8, v4, v6
2007 ; GISEL-NEXT: v_mul_lo_u32 v9, v5, v6
2008 ; GISEL-NEXT: v_mul_lo_u32 v7, v4, v7
2009 ; GISEL-NEXT: v_mul_hi_u32 v6, v4, v6
2010 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v9, v7
2011 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
2012 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
2013 ; GISEL-NEXT: v_subb_u32_e64 v7, s[4:5], v3, v6, vcc
2014 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v6
2015 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v5
2016 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2017 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
2018 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2019 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v5
2020 ; GISEL-NEXT: v_cndmask_b32_e64 v6, v6, v8, s[4:5]
2021 ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], v2, v4
2022 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
2023 ; GISEL-NEXT: v_subbrev_u32_e64 v9, vcc, 0, v3, s[4:5]
2024 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v9, v5
2025 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc
2026 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v4
2027 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
2028 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v9, v5
2029 ; GISEL-NEXT: v_cndmask_b32_e32 v10, v10, v11, vcc
2030 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
2031 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5]
2032 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v3, vcc
2033 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
2034 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc
2035 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
2036 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
2037 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2038 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
2039 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2041 ; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
2043 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2044 ; CGP-NEXT: v_mov_b32_e32 v8, v0
2045 ; CGP-NEXT: v_mov_b32_e32 v9, v1
2046 ; CGP-NEXT: v_mov_b32_e32 v5, v2
2047 ; CGP-NEXT: v_mov_b32_e32 v7, v3
2048 ; CGP-NEXT: s_mov_b64 s[6:7], 0x1000
2049 ; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4
2050 ; CGP-NEXT: v_or_b32_e32 v1, v9, v3
2051 ; CGP-NEXT: v_mov_b32_e32 v0, 0
2052 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
2053 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
2054 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
2055 ; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
2056 ; CGP-NEXT: s_cbranch_execz .LBB8_2
2057 ; CGP-NEXT: ; %bb.1:
2058 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
2059 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3
2060 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
2061 ; CGP-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc
2062 ; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
2063 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
2064 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
2065 ; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
2066 ; CGP-NEXT: v_trunc_f32_e32 v1, v1
2067 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
2068 ; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1
2069 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
2070 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v1
2071 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v0
2072 ; CGP-NEXT: v_mul_lo_u32 v13, v10, v0
2073 ; CGP-NEXT: v_mul_hi_u32 v14, v4, v0
2074 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
2075 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v12
2076 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v12
2077 ; CGP-NEXT: v_mul_hi_u32 v12, v1, v12
2078 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14
2079 ; CGP-NEXT: v_mul_lo_u32 v14, v0, v11
2080 ; CGP-NEXT: v_mul_lo_u32 v16, v1, v11
2081 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v11
2082 ; CGP-NEXT: v_mul_hi_u32 v11, v1, v11
2083 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
2084 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2085 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12
2086 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2087 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
2088 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2089 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
2090 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2091 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
2092 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15
2093 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
2094 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2095 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
2096 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
2097 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12
2098 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v11, vcc
2099 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v0
2100 ; CGP-NEXT: v_mul_lo_u32 v10, v10, v0
2101 ; CGP-NEXT: v_mul_hi_u32 v12, v4, v0
2102 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v1
2103 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v11
2104 ; CGP-NEXT: v_mul_hi_u32 v14, v0, v11
2105 ; CGP-NEXT: v_mul_hi_u32 v11, v1, v11
2106 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
2107 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12
2108 ; CGP-NEXT: v_mul_lo_u32 v10, v0, v4
2109 ; CGP-NEXT: v_mul_lo_u32 v12, v1, v4
2110 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v4
2111 ; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
2112 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10
2113 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2114 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
2115 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2116 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
2117 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2118 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
2119 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2120 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10
2121 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
2122 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
2123 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2124 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
2125 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
2126 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
2127 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
2128 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v0
2129 ; CGP-NEXT: v_mul_hi_u32 v10, v8, v0
2130 ; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
2131 ; CGP-NEXT: v_mul_lo_u32 v11, v8, v1
2132 ; CGP-NEXT: v_mul_lo_u32 v12, v9, v1
2133 ; CGP-NEXT: v_mul_hi_u32 v13, v8, v1
2134 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1
2135 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
2136 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2137 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v12, v0
2138 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2139 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
2140 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2141 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13
2142 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2143 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4
2144 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
2145 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
2146 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2147 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
2148 ; CGP-NEXT: v_mul_lo_u32 v10, v2, v0
2149 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v0
2150 ; CGP-NEXT: v_mul_hi_u32 v0, v2, v0
2151 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
2152 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1
2153 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v11, v1
2154 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
2155 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v10
2156 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v0, vcc
2157 ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0
2158 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
2159 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2160 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3
2161 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2162 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
2163 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
2164 ; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
2165 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v1, v2
2166 ; CGP-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v0, vcc
2167 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v2
2168 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
2169 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
2170 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v10, v3
2171 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc
2172 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2
2173 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2174 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3
2175 ; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v11, vcc
2176 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
2177 ; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
2178 ; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v0, vcc
2179 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
2180 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
2181 ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
2182 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
2183 ; CGP-NEXT: ; implicit-def: $vgpr8
2184 ; CGP-NEXT: .LBB8_2: ; %Flow2
2185 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
2186 ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
2187 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
2188 ; CGP-NEXT: s_cbranch_execz .LBB8_4
2189 ; CGP-NEXT: ; %bb.3:
2190 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
2191 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
2192 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
2193 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2194 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
2195 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
2196 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
2197 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
2198 ; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
2199 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v2
2200 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0
2201 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
2202 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
2203 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2204 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
2205 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
2206 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2207 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2208 ; CGP-NEXT: .LBB8_4:
2209 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
2210 ; CGP-NEXT: v_or_b32_e32 v3, v7, v10
2211 ; CGP-NEXT: v_mov_b32_e32 v2, 0
2212 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2213 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
2214 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
2215 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
2216 ; CGP-NEXT: s_cbranch_execz .LBB8_6
2217 ; CGP-NEXT: ; %bb.5:
2218 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v9
2219 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v10
2220 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v9
2221 ; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v10, vcc
2222 ; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
2223 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
2224 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
2225 ; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
2226 ; CGP-NEXT: v_trunc_f32_e32 v3, v3
2227 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
2228 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
2229 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
2230 ; CGP-NEXT: v_mul_lo_u32 v8, v4, v3
2231 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v2
2232 ; CGP-NEXT: v_mul_lo_u32 v12, v6, v2
2233 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v2
2234 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8
2235 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v11
2236 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
2237 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v11
2238 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13
2239 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v8
2240 ; CGP-NEXT: v_mul_lo_u32 v15, v3, v8
2241 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8
2242 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
2243 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
2244 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2245 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
2246 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2247 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
2248 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2249 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
2250 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2251 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
2252 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
2253 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
2254 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2255 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
2256 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
2257 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
2258 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
2259 ; CGP-NEXT: v_mul_lo_u32 v8, v4, v2
2260 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
2261 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v2
2262 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
2263 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v8
2264 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v8
2265 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
2266 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
2267 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
2268 ; CGP-NEXT: v_mul_lo_u32 v6, v2, v4
2269 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v4
2270 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v4
2271 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
2272 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
2273 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2274 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
2275 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2276 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
2277 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
2278 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14
2279 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2280 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
2281 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
2282 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
2283 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2284 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
2285 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
2286 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
2287 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc
2288 ; CGP-NEXT: v_mul_lo_u32 v4, v7, v2
2289 ; CGP-NEXT: v_mul_hi_u32 v6, v5, v2
2290 ; CGP-NEXT: v_mul_hi_u32 v2, v7, v2
2291 ; CGP-NEXT: v_mul_lo_u32 v8, v5, v3
2292 ; CGP-NEXT: v_mul_lo_u32 v11, v7, v3
2293 ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3
2294 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3
2295 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
2296 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2297 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
2298 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2299 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
2300 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2301 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
2302 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
2303 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
2304 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6
2305 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
2306 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2307 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
2308 ; CGP-NEXT: v_mul_lo_u32 v6, v9, v2
2309 ; CGP-NEXT: v_mul_lo_u32 v8, v10, v2
2310 ; CGP-NEXT: v_mul_hi_u32 v2, v9, v2
2311 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
2312 ; CGP-NEXT: v_mul_lo_u32 v3, v9, v3
2313 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v3
2314 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
2315 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
2316 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v2, vcc
2317 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v7, v2
2318 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v9
2319 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
2320 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v10
2321 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2322 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc
2323 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10
2324 ; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
2325 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v3, v9
2326 ; CGP-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v2, vcc
2327 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v9
2328 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2329 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc
2330 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v10
2331 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
2332 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v6, v9
2333 ; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
2334 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v7, v10
2335 ; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v8, vcc
2336 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
2337 ; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc
2338 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v2, vcc
2339 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
2340 ; CGP-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc
2341 ; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
2342 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
2343 ; CGP-NEXT: ; implicit-def: $vgpr5
2344 ; CGP-NEXT: .LBB8_6: ; %Flow
2345 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
2346 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
2347 ; CGP-NEXT: s_cbranch_execz .LBB8_8
2348 ; CGP-NEXT: ; %bb.7:
2349 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v9
2350 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
2351 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
2352 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2353 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
2354 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
2355 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
2356 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
2357 ; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
2358 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v9
2359 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
2360 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v9
2361 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9
2362 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
2363 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v9
2364 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9
2365 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
2366 ; CGP-NEXT: v_mov_b32_e32 v3, 0
2367 ; CGP-NEXT: .LBB8_8:
2368 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
2369 ; CGP-NEXT: s_setpc_b64 s[30:31]
2370 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
2371 %r = urem <2 x i64> %x, %shl.y
2375 define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
2376 ; GISEL-LABEL: v_urem_i64_24bit:
2378 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2379 ; GISEL-NEXT: s_mov_b32 s4, 0xffffff
2380 ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
2381 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v2
2382 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
2383 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
2384 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
2385 ; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2386 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
2387 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
2388 ; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
2389 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
2390 ; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
2391 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
2392 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
2393 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
2394 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
2395 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2396 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
2397 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
2398 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2399 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
2400 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2402 ; CGP-LABEL: v_urem_i64_24bit:
2404 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2405 ; CGP-NEXT: s_mov_b32 s4, 0xffffff
2406 ; CGP-NEXT: v_and_b32_e32 v0, s4, v0
2407 ; CGP-NEXT: v_and_b32_e32 v1, s4, v2
2408 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0
2409 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
2410 ; CGP-NEXT: v_rcp_f32_e32 v4, v3
2411 ; CGP-NEXT: v_mul_f32_e32 v4, v2, v4
2412 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
2413 ; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2
2414 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
2415 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3
2416 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2417 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2
2418 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1
2419 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
2420 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2421 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2422 ; CGP-NEXT: s_setpc_b64 s[30:31]
2423 %num.mask = and i64 %num, 16777215
2424 %den.mask = and i64 %den, 16777215
2425 %result = urem i64 %num.mask, %den.mask
2429 define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
2430 ; GISEL-LABEL: v_urem_v2i64_24bit:
2432 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2433 ; GISEL-NEXT: s_mov_b32 s6, 0xffffff
2434 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v7, 0
2435 ; GISEL-NEXT: v_and_b32_e32 v3, s6, v4
2436 ; GISEL-NEXT: v_and_b32_e32 v1, s6, v6
2437 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v3
2438 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v3
2439 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
2440 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v1
2441 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v1
2442 ; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
2443 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7
2444 ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v7
2445 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
2446 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8
2447 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
2448 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
2449 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
2450 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
2451 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8
2452 ; GISEL-NEXT: v_trunc_f32_e32 v11, v11
2453 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
2454 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
2455 ; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11
2456 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
2457 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
2458 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
2459 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
2460 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
2461 ; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6
2462 ; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6
2463 ; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6
2464 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7
2465 ; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7
2466 ; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7
2467 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
2468 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
2469 ; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17
2470 ; GISEL-NEXT: v_mul_hi_u32 v18, v7, v17
2471 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19
2472 ; GISEL-NEXT: v_mul_lo_u32 v19, v7, v13
2473 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
2474 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
2475 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18
2476 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
2477 ; GISEL-NEXT: v_mul_hi_u32 v18, v6, v14
2478 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
2479 ; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
2480 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16
2481 ; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12
2482 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16
2483 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
2484 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18
2485 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12
2486 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
2487 ; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18
2488 ; GISEL-NEXT: v_mul_hi_u32 v18, v6, v12
2489 ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14
2490 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
2491 ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18
2492 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
2493 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18
2494 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2495 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
2496 ; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13
2497 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
2498 ; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
2499 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
2500 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19
2501 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
2502 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19
2503 ; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
2504 ; GISEL-NEXT: s_bfe_i32 s5, -1, 0x10000
2505 ; GISEL-NEXT: s_bfe_i32 s7, -1, 0x10000
2506 ; GISEL-NEXT: s_bfe_i32 s8, -1, 0x10000
2507 ; GISEL-NEXT: v_and_b32_e32 v0, s6, v0
2508 ; GISEL-NEXT: v_and_b32_e32 v2, s6, v2
2509 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
2510 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2511 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
2512 ; GISEL-NEXT: v_mov_b32_e32 v16, s4
2513 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
2514 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2515 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
2516 ; GISEL-NEXT: v_mov_b32_e32 v19, s5
2517 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
2518 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
2519 ; GISEL-NEXT: v_mov_b32_e32 v15, s7
2520 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
2521 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
2522 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
2523 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
2524 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6
2525 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6
2526 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6
2527 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
2528 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc
2529 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7
2530 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7
2531 ; GISEL-NEXT: v_mul_hi_u32 v17, v9, v7
2532 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8
2533 ; GISEL-NEXT: v_mul_lo_u32 v18, v8, v12
2534 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
2535 ; GISEL-NEXT: v_mul_hi_u32 v5, v6, v12
2536 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
2537 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11
2538 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
2539 ; GISEL-NEXT: v_mul_lo_u32 v10, v11, v13
2540 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
2541 ; GISEL-NEXT: v_mul_hi_u32 v14, v7, v13
2542 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
2543 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
2544 ; GISEL-NEXT: v_mul_lo_u32 v17, v7, v9
2545 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
2546 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2547 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
2548 ; GISEL-NEXT: v_mul_lo_u32 v10, v6, v4
2549 ; GISEL-NEXT: v_mul_lo_u32 v14, v8, v4
2550 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v18, v10
2551 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
2552 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v10, v5
2553 ; GISEL-NEXT: v_mul_hi_u32 v5, v6, v4
2554 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2555 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v18, v10
2556 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v9
2557 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12
2558 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2559 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5
2560 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2561 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12
2562 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2563 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
2564 ; GISEL-NEXT: v_mul_hi_u32 v17, v7, v9
2565 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
2566 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2567 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
2568 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2569 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17
2570 ; GISEL-NEXT: v_mov_b32_e32 v18, s8
2571 ; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4
2572 ; GISEL-NEXT: v_mul_hi_u32 v9, v11, v9
2573 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
2574 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2575 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
2576 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2577 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
2578 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v14
2579 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
2580 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
2581 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
2582 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc
2583 ; GISEL-NEXT: v_mul_lo_u32 v6, 0, v5
2584 ; GISEL-NEXT: v_mul_hi_u32 v8, v0, v5
2585 ; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
2586 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
2587 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v11, v9, vcc
2588 ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7
2589 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7
2590 ; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
2591 ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v4
2592 ; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
2593 ; GISEL-NEXT: v_mul_hi_u32 v14, v0, v4
2594 ; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
2595 ; GISEL-NEXT: v_mul_lo_u32 v17, v2, v9
2596 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
2597 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2598 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2599 ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v9
2600 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v9
2601 ; GISEL-NEXT: v_mul_hi_u32 v9, 0, v9
2602 ; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12
2603 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2604 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v13, v5
2605 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2606 ; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7
2607 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2608 ; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8
2609 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
2610 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v14
2611 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
2612 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2613 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
2614 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2615 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6
2616 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
2617 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v14
2618 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2619 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
2620 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
2621 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12
2622 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2623 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
2624 ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v5
2625 ; GISEL-NEXT: v_mul_lo_u32 v12, 0, v5
2626 ; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
2627 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2628 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v7
2629 ; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7
2630 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
2631 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
2632 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v10
2633 ; GISEL-NEXT: v_mul_lo_u32 v4, v3, v4
2634 ; GISEL-NEXT: v_mul_lo_u32 v6, v1, v6
2635 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4
2636 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6
2637 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
2638 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v7
2639 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
2640 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v4, vcc
2641 ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 0, v4
2642 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v3
2643 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2644 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11
2645 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
2646 ; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5
2647 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v1
2648 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
2649 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
2650 ; GISEL-NEXT: v_cndmask_b32_e64 v7, v16, v7, s[6:7]
2651 ; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
2652 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
2653 ; GISEL-NEXT: v_cndmask_b32_e32 v9, v15, v9, vcc
2654 ; GISEL-NEXT: v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
2655 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v0, v3
2656 ; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
2657 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v3
2658 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
2659 ; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v1
2660 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
2661 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v1
2662 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
2663 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
2664 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v19, v11, vcc
2665 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v10, v3
2666 ; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v4, vcc
2667 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
2668 ; GISEL-NEXT: v_cndmask_b32_e32 v13, v18, v13, vcc
2669 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v12, v1
2670 ; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v5, vcc
2671 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
2672 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
2673 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13
2674 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v12, v1, s[4:5]
2675 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc
2676 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
2677 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2678 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v15, s[4:5]
2679 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
2680 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v1, s[4:5]
2681 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
2682 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
2683 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2685 ; CGP-LABEL: v_urem_v2i64_24bit:
2687 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2688 ; CGP-NEXT: s_mov_b32 s6, 0xffffff
2689 ; CGP-NEXT: v_and_b32_e32 v0, s6, v0
2690 ; CGP-NEXT: v_and_b32_e32 v1, s6, v2
2691 ; CGP-NEXT: v_and_b32_e32 v2, s6, v4
2692 ; CGP-NEXT: v_and_b32_e32 v3, s6, v6
2693 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0
2694 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
2695 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v1
2696 ; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
2697 ; CGP-NEXT: v_rcp_f32_e32 v8, v5
2698 ; CGP-NEXT: v_rcp_f32_e32 v9, v7
2699 ; CGP-NEXT: v_mul_f32_e32 v8, v4, v8
2700 ; CGP-NEXT: v_mul_f32_e32 v9, v6, v9
2701 ; CGP-NEXT: v_trunc_f32_e32 v8, v8
2702 ; CGP-NEXT: v_trunc_f32_e32 v9, v9
2703 ; CGP-NEXT: v_mad_f32 v4, -v8, v5, v4
2704 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
2705 ; CGP-NEXT: v_mad_f32 v6, -v9, v7, v6
2706 ; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
2707 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v4|, v5
2708 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2709 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v6|, v7
2710 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
2711 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
2712 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
2713 ; CGP-NEXT: v_mul_lo_u32 v2, v4, v2
2714 ; CGP-NEXT: v_mul_lo_u32 v3, v5, v3
2715 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
2716 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
2717 ; CGP-NEXT: v_and_b32_e32 v0, s6, v0
2718 ; CGP-NEXT: v_and_b32_e32 v2, s6, v1
2719 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2720 ; CGP-NEXT: v_mov_b32_e32 v3, 0
2721 ; CGP-NEXT: s_setpc_b64 s[30:31]
2722 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
2723 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
2724 %result = urem <2 x i64> %num.mask, %den.mask
2725 ret <2 x i64> %result