1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i64 @v_urem_i64(i64 %num, i64 %den) {
8 ; CHECK-LABEL: v_urem_i64:
10 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; CHECK-NEXT: v_mov_b32_e32 v4, v0
12 ; CHECK-NEXT: v_mov_b32_e32 v5, v1
13 ; CHECK-NEXT: v_or_b32_e32 v1, v5, v3
14 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
15 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16 ; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2
17 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
18 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
19 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
20 ; CHECK-NEXT: s_cbranch_execnz .LBB0_3
21 ; CHECK-NEXT: ; %bb.1: ; %Flow
22 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
23 ; CHECK-NEXT: s_cbranch_execnz .LBB0_4
24 ; CHECK-NEXT: .LBB0_2:
25 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
26 ; CHECK-NEXT: s_setpc_b64 s[30:31]
27 ; CHECK-NEXT: .LBB0_3:
28 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v3
29 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
30 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
31 ; CHECK-NEXT: v_mac_f32_e32 v6, 0x4f800000, v0
32 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6
33 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
34 ; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v0
35 ; CHECK-NEXT: v_trunc_f32_e32 v6, v6
36 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6
37 ; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6
38 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
39 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v6
40 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v0
41 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
42 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v0
43 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
44 ; CHECK-NEXT: v_mul_lo_u32 v10, v6, v9
45 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
46 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9
47 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
48 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
49 ; CHECK-NEXT: v_mul_lo_u32 v13, v6, v8
50 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
51 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8
52 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
53 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
54 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
55 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
56 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
57 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
58 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
59 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
60 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
61 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
62 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
63 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
64 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
65 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
66 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
67 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc
68 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v0
69 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
70 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v0
71 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v6
72 ; CHECK-NEXT: v_mul_lo_u32 v10, v6, v8
73 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
74 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8
75 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1
76 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9
77 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v1
78 ; CHECK-NEXT: v_mul_lo_u32 v9, v6, v1
79 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1
80 ; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1
81 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
82 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
83 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
84 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
85 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
86 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
87 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
88 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
89 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
90 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
91 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
92 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
93 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
94 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
95 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
96 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc
97 ; CHECK-NEXT: v_mul_lo_u32 v6, v5, v0
98 ; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
99 ; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
100 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v1
101 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1
102 ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1
103 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
104 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
105 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
106 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
107 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
108 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
109 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
110 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
111 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
112 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
113 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
114 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
115 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
116 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
117 ; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0
118 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0
119 ; CHECK-NEXT: v_mul_hi_u32 v0, v2, v0
120 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6
121 ; CHECK-NEXT: v_mul_lo_u32 v1, v2, v1
122 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1
123 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
124 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v7
125 ; CHECK-NEXT: v_subb_u32_e64 v4, s[4:5], v5, v0, vcc
126 ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v5, v0
127 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
128 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
129 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3
130 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
131 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
132 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
133 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
134 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, v1, v2
135 ; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc
136 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v2
137 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
138 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
139 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v3
140 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
141 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v6, v2
142 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
143 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3
144 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v9, v8, vcc
145 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
146 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
147 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v7, v0, vcc
148 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
149 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
150 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
151 ; CHECK-NEXT: ; implicit-def: $vgpr6
152 ; CHECK-NEXT: ; implicit-def: $vgpr2
153 ; CHECK-NEXT: ; implicit-def: $vgpr4
154 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
155 ; CHECK-NEXT: s_cbranch_execz .LBB0_2
156 ; CHECK-NEXT: .LBB0_4:
157 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6
158 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
159 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
160 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
161 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
162 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
163 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
164 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
165 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, v2
166 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v4, v0
167 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
168 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
169 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
170 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
171 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
172 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
173 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
174 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
175 ; CHECK-NEXT: s_setpc_b64 s[30:31]
176 %result = urem i64 %num, %den
180 ; FIXME: This is a workaround for not handling uniform VGPR case.
181 declare i32 @llvm.amdgcn.readfirstlane(i32)
183 define amdgpu_ps i64 @s_urem_i64(i64 inreg %num, i64 inreg %den) {
184 ; CHECK-LABEL: s_urem_i64:
186 ; CHECK-NEXT: s_mov_b32 s4, 1
187 ; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3]
188 ; CHECK-NEXT: s_mov_b32 s8, 0
189 ; CHECK-NEXT: s_mov_b32 s9, -1
190 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[8:9]
191 ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
192 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
193 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2
194 ; CHECK-NEXT: ; %bb.1:
195 ; CHECK-NEXT: v_mov_b32_e32 v0, s3
196 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
197 ; CHECK-NEXT: s_sub_u32 s4, 0, s2
198 ; CHECK-NEXT: v_mov_b32_e32 v3, s1
199 ; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
200 ; CHECK-NEXT: s_subb_u32 s5, 0, s3
201 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
202 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
203 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1
204 ; CHECK-NEXT: v_trunc_f32_e32 v4, v4
205 ; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4
206 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
207 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
208 ; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4
209 ; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1
210 ; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1
211 ; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1
212 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
213 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6
214 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6
215 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
216 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
217 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5
218 ; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5
219 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5
220 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
221 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
222 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
223 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6
224 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
225 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
226 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
227 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11
228 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
229 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
230 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9
231 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
232 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
233 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
234 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
235 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6
236 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
237 ; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1
238 ; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1
239 ; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1
240 ; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4
241 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5
242 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5
243 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
244 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
245 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
246 ; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6
247 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6
248 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6
249 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
250 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
251 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
252 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5
253 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
254 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10
255 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
256 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11
257 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
258 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
259 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
260 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
261 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
262 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
263 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
264 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5
265 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc
266 ; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1
267 ; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1
268 ; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1
269 ; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4
270 ; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4
271 ; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4
272 ; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4
273 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
274 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
275 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1
276 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
277 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
278 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
279 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9
280 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
281 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
282 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
283 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5
284 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
285 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
286 ; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1
287 ; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1
288 ; CHECK-NEXT: v_mul_hi_u32 v1, s2, v1
289 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
290 ; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4
291 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4
292 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v4, v1
293 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, s0, v6
294 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v1, vcc
295 ; CHECK-NEXT: v_sub_i32_e64 v1, s[4:5], s1, v1
296 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v4
297 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
298 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3
299 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
300 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v1, v0, vcc
301 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3
302 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc
303 ; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v4
304 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
305 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3
306 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
307 ; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, s2, v3
308 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0
309 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
310 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0
311 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v7, v5, vcc
312 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
313 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc
314 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
315 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc
316 ; CHECK-NEXT: s_mov_b32 s4, 0
317 ; CHECK-NEXT: s_branch .LBB1_3
318 ; CHECK-NEXT: .LBB1_2:
319 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
320 ; CHECK-NEXT: .LBB1_3: ; %Flow
321 ; CHECK-NEXT: s_xor_b32 s1, s4, 1
322 ; CHECK-NEXT: s_and_b32 s1, s1, 1
323 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0
324 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
325 ; CHECK-NEXT: ; %bb.4:
326 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
327 ; CHECK-NEXT: s_sub_i32 s1, 0, s2
328 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
329 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
330 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0
331 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
332 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
333 ; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0
334 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, s2
335 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
336 ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
337 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
338 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
339 ; CHECK-NEXT: v_subrev_i32_e32 v1, vcc, s2, v0
340 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v0
341 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
342 ; CHECK-NEXT: .LBB1_5:
343 ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
344 ; CHECK-NEXT: s_mov_b32 s1, s0
345 ; CHECK-NEXT: ; return to shader part epilog
346 %result = urem i64 %num, %den
347 %cast = bitcast i64 %result to <2 x i32>
348 %elt.0 = extractelement <2 x i32> %cast, i32 0
349 %elt.1 = extractelement <2 x i32> %cast, i32 1
350 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
351 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
352 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
353 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
354 %cast.back = bitcast <2 x i32> %ins.1 to i64
358 define <2 x i64> @v_urem_v2i64(<2 x i64> %num, <2 x i64> %den) {
359 ; GISEL-LABEL: v_urem_v2i64:
361 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
362 ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4
363 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5
364 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4
365 ; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6
366 ; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v7
367 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v6
368 ; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v5, vcc
369 ; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v7, s[4:5]
370 ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11
371 ; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14
372 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10
373 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13
374 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10
375 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11
376 ; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10
377 ; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11
378 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13
379 ; GISEL-NEXT: v_trunc_f32_e32 v14, v14
380 ; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13
381 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
382 ; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14
383 ; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14
384 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10
385 ; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13
386 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
387 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14
388 ; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11
389 ; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11
390 ; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11
391 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
392 ; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18
393 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
394 ; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17
395 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
396 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18
397 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20
398 ; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10
399 ; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10
400 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16
401 ; GISEL-NEXT: v_mul_hi_u32 v20, v8, v10
402 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
403 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19
404 ; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16
405 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
406 ; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19
407 ; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21
408 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19
409 ; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18
410 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16
411 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
412 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17
413 ; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18
414 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16
415 ; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20
416 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17
417 ; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20
418 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
419 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9]
420 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
421 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11]
422 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15]
423 ; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22
424 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
425 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5]
426 ; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23
427 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13]
428 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17]
429 ; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24
430 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
431 ; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22
432 ; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19
433 ; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16
434 ; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17
435 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18
436 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
437 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18
438 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
439 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19
440 ; GISEL-NEXT: v_mul_lo_u32 v20, v8, v10
441 ; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10
442 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18
443 ; GISEL-NEXT: v_mul_hi_u32 v18, v8, v10
444 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19
445 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11
446 ; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11
447 ; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
448 ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11
449 ; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
450 ; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20
451 ; GISEL-NEXT: v_mul_lo_u32 v8, v8, v13
452 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8
453 ; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19
454 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14
455 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
456 ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20
457 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20
458 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v18
459 ; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19
460 ; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19
461 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16
462 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v8
463 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
464 ; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8
465 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17
466 ; GISEL-NEXT: v_mul_hi_u32 v12, v10, v8
467 ; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8
468 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9
469 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
470 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9
471 ; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17
472 ; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9
473 ; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9
474 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
475 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
476 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15
477 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
478 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
479 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12
480 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11]
481 ; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18
482 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
483 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19
484 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
485 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20
486 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
487 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
488 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9]
489 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20
490 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19
491 ; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17
492 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
493 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
494 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
495 ; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17
496 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18
497 ; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19
498 ; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10
499 ; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10
500 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10
501 ; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11
502 ; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11
503 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11
504 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12
505 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
506 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v13, v8, vcc
507 ; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
508 ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v8
509 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v8
510 ; GISEL-NEXT: v_mul_hi_u32 v14, v0, v8
511 ; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8
512 ; GISEL-NEXT: v_mul_lo_u32 v15, v2, v9
513 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9
514 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
515 ; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9
516 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9
517 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10
518 ; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v18, v15
519 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v20, v11
520 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
521 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
522 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
523 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
524 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
525 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14
526 ; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v19
527 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v16
528 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
529 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
530 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
531 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
532 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
533 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v14
534 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v16
535 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
536 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
537 ; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15
538 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
539 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
540 ; GISEL-NEXT: v_mul_lo_u32 v16, v4, v10
541 ; GISEL-NEXT: v_mul_lo_u32 v17, v5, v10
542 ; GISEL-NEXT: v_mul_hi_u32 v10, v4, v10
543 ; GISEL-NEXT: v_mul_lo_u32 v18, v6, v11
544 ; GISEL-NEXT: v_mul_lo_u32 v19, v7, v11
545 ; GISEL-NEXT: v_mul_hi_u32 v11, v6, v11
546 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
547 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
548 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16
549 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v18
550 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v13
551 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12
552 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v4
553 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v6
554 ; GISEL-NEXT: v_sub_i32_e64 v12, s[10:11], v0, v4
555 ; GISEL-NEXT: v_sub_i32_e64 v13, s[12:13], v2, v6
556 ; GISEL-NEXT: v_mul_lo_u32 v8, v4, v8
557 ; GISEL-NEXT: v_mul_lo_u32 v9, v6, v9
558 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7]
559 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9]
560 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4
561 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v13, v6
562 ; GISEL-NEXT: v_sub_i32_e64 v4, s[14:15], v12, v4
563 ; GISEL-NEXT: v_sub_i32_e64 v6, s[16:17], v13, v6
564 ; GISEL-NEXT: v_add_i32_e64 v8, s[18:19], v17, v8
565 ; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v19, v9
566 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7]
567 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[8:9]
568 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v10
569 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11
570 ; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v8, vcc
571 ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v8
572 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], v3, v9, s[4:5]
573 ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v9
574 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v5
575 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
576 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v8, v7
577 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[4:5]
578 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5
579 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v8, v7
580 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
581 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
582 ; GISEL-NEXT: v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11]
583 ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v5, s[10:11]
584 ; GISEL-NEXT: v_subbrev_u32_e64 v19, vcc, 0, v3, s[12:13]
585 ; GISEL-NEXT: v_subb_u32_e64 v3, vcc, v3, v7, s[12:13]
586 ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[4:5]
587 ; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9]
588 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v18, v5
589 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
590 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v19, v7
591 ; GISEL-NEXT: v_subbrev_u32_e64 v3, s[6:7], 0, v3, s[16:17]
592 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v5
593 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v19, v7
594 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
595 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
596 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
597 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
598 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[6:7]
599 ; GISEL-NEXT: v_cndmask_b32_e64 v7, v7, v17, s[8:9]
600 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5
601 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v7
602 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[6:7]
603 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v13, v6, s[8:9]
604 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v1, s[6:7]
605 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[8:9]
606 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
607 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[4:5]
608 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc
609 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
610 ; GISEL-NEXT: s_setpc_b64 s[30:31]
612 ; CGP-LABEL: v_urem_v2i64:
614 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615 ; CGP-NEXT: v_mov_b32_e32 v10, v0
616 ; CGP-NEXT: v_mov_b32_e32 v11, v1
617 ; CGP-NEXT: v_mov_b32_e32 v8, v2
618 ; CGP-NEXT: v_mov_b32_e32 v9, v3
619 ; CGP-NEXT: v_or_b32_e32 v1, v11, v5
620 ; CGP-NEXT: v_mov_b32_e32 v0, 0
621 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
622 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4
623 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
624 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
625 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
626 ; CGP-NEXT: s_cbranch_execz .LBB2_2
628 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5
629 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
630 ; CGP-NEXT: v_subb_u32_e32 v3, vcc, 0, v5, vcc
631 ; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v0
632 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2
633 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
634 ; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
635 ; CGP-NEXT: v_trunc_f32_e32 v2, v2
636 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
637 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
638 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
639 ; CGP-NEXT: v_mul_lo_u32 v12, v1, v2
640 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v0
641 ; CGP-NEXT: v_mul_lo_u32 v14, v3, v0
642 ; CGP-NEXT: v_mul_hi_u32 v15, v1, v0
643 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
644 ; CGP-NEXT: v_mul_lo_u32 v14, v2, v13
645 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v13
646 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v13
647 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
648 ; CGP-NEXT: v_mul_lo_u32 v15, v0, v12
649 ; CGP-NEXT: v_mul_lo_u32 v17, v2, v12
650 ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12
651 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12
652 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
653 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
654 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13
655 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
656 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
657 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
658 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18
659 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
660 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
661 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16
662 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
663 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
664 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
665 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
666 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13
667 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc
668 ; CGP-NEXT: v_mul_lo_u32 v12, v1, v0
669 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v0
670 ; CGP-NEXT: v_mul_hi_u32 v13, v1, v0
671 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v2
672 ; CGP-NEXT: v_mul_lo_u32 v14, v2, v12
673 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v12
674 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12
675 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1
676 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13
677 ; CGP-NEXT: v_mul_lo_u32 v3, v0, v1
678 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v1
679 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v1
680 ; CGP-NEXT: v_mul_hi_u32 v1, v2, v1
681 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
682 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
683 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
684 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
685 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15
686 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
687 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
688 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
689 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
690 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
691 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3
692 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
693 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
694 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12
695 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3
696 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
697 ; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
698 ; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
699 ; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
700 ; CGP-NEXT: v_mul_lo_u32 v12, v10, v1
701 ; CGP-NEXT: v_mul_lo_u32 v13, v11, v1
702 ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1
703 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1
704 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
705 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
706 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0
707 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
708 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
709 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
710 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
711 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
712 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
713 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3
714 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
715 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
716 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
717 ; CGP-NEXT: v_mul_lo_u32 v3, v4, v0
718 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v0
719 ; CGP-NEXT: v_mul_hi_u32 v0, v4, v0
720 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2
721 ; CGP-NEXT: v_mul_lo_u32 v1, v4, v1
722 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
723 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
724 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v3
725 ; CGP-NEXT: v_subb_u32_e64 v2, s[4:5], v11, v0, vcc
726 ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v11, v0
727 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v4
728 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
729 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v5
730 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
731 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
732 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5
733 ; CGP-NEXT: v_cndmask_b32_e32 v3, v10, v3, vcc
734 ; CGP-NEXT: v_sub_i32_e32 v10, vcc, v1, v4
735 ; CGP-NEXT: v_subbrev_u32_e64 v11, s[4:5], 0, v0, vcc
736 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v4
737 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
738 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v5, vcc
739 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5
740 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
741 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, v10, v4
742 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
743 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v11, v5
744 ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v12, vcc
745 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
746 ; CGP-NEXT: v_cndmask_b32_e32 v4, v10, v4, vcc
747 ; CGP-NEXT: v_cndmask_b32_e32 v5, v11, v0, vcc
748 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
749 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
750 ; CGP-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
751 ; CGP-NEXT: ; implicit-def: $vgpr2
752 ; CGP-NEXT: ; implicit-def: $vgpr4
753 ; CGP-NEXT: ; implicit-def: $vgpr10
754 ; CGP-NEXT: .LBB2_2: ; %Flow1
755 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
756 ; CGP-NEXT: s_cbranch_execz .LBB2_4
758 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2
759 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
760 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
761 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
762 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
763 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
764 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
765 ; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
766 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v4
767 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v10, v0
768 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
769 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
770 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
771 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v4
772 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
773 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
774 ; CGP-NEXT: v_mov_b32_e32 v1, 0
776 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
777 ; CGP-NEXT: v_or_b32_e32 v3, v9, v7
778 ; CGP-NEXT: v_mov_b32_e32 v2, 0
779 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
780 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6
781 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
782 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
783 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
784 ; CGP-NEXT: s_cbranch_execnz .LBB2_7
785 ; CGP-NEXT: ; %bb.5: ; %Flow
786 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
787 ; CGP-NEXT: s_cbranch_execnz .LBB2_8
789 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
790 ; CGP-NEXT: s_setpc_b64 s[30:31]
792 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v7
793 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
794 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc
795 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v2
796 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
797 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
798 ; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2
799 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
800 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
801 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
802 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
803 ; CGP-NEXT: v_mul_lo_u32 v10, v3, v4
804 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v2
805 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v2
806 ; CGP-NEXT: v_mul_hi_u32 v13, v3, v2
807 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
808 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v11
809 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
810 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v11
811 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
812 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v10
813 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v10
814 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10
815 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10
816 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
817 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
818 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
819 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
820 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
821 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
822 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
823 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
824 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
825 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
826 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
827 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
828 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
829 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
830 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
831 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc
832 ; CGP-NEXT: v_mul_lo_u32 v10, v3, v2
833 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
834 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v2
835 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v4
836 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v10
837 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v10
838 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10
839 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3
840 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11
841 ; CGP-NEXT: v_mul_lo_u32 v5, v2, v3
842 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v3
843 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3
844 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3
845 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
846 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
847 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
848 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
849 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
850 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
851 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
852 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
853 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
854 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
855 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
856 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
857 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
858 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10
859 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
860 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc
861 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v2
862 ; CGP-NEXT: v_mul_hi_u32 v5, v8, v2
863 ; CGP-NEXT: v_mul_hi_u32 v2, v9, v2
864 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v3
865 ; CGP-NEXT: v_mul_lo_u32 v11, v9, v3
866 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3
867 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3
868 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
869 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
870 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
871 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
872 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
873 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
874 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
875 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
876 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
877 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
878 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
879 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
880 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
881 ; CGP-NEXT: v_mul_lo_u32 v5, v6, v2
882 ; CGP-NEXT: v_mul_lo_u32 v10, v7, v2
883 ; CGP-NEXT: v_mul_hi_u32 v2, v6, v2
884 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
885 ; CGP-NEXT: v_mul_lo_u32 v3, v6, v3
886 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v10, v3
887 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
888 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v5
889 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v2, vcc
890 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v9, v2
891 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v6
892 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
893 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v7
894 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
895 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc
896 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7
897 ; CGP-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc
898 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v3, v6
899 ; CGP-NEXT: v_subbrev_u32_e64 v9, s[4:5], 0, v2, vcc
900 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v6
901 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
902 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v7, vcc
903 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v9, v7
904 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
905 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v8, v6
906 ; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
907 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v9, v7
908 ; CGP-NEXT: v_cndmask_b32_e32 v7, v11, v10, vcc
909 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
910 ; CGP-NEXT: v_cndmask_b32_e32 v6, v8, v6, vcc
911 ; CGP-NEXT: v_cndmask_b32_e32 v7, v9, v2, vcc
912 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
913 ; CGP-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc
914 ; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
915 ; CGP-NEXT: ; implicit-def: $vgpr4
916 ; CGP-NEXT: ; implicit-def: $vgpr6
917 ; CGP-NEXT: ; implicit-def: $vgpr8
918 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
919 ; CGP-NEXT: s_cbranch_execz .LBB2_6
921 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
922 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
923 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
924 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
925 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
926 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
927 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
928 ; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
929 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v6
930 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
931 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
932 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
933 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
934 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v6
935 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
936 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
937 ; CGP-NEXT: v_mov_b32_e32 v3, 0
938 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
939 ; CGP-NEXT: s_setpc_b64 s[30:31]
940 %result = urem <2 x i64> %num, %den
941 ret <2 x i64> %result
944 define i64 @v_urem_i64_pow2k_denom(i64 %num) {
945 ; CHECK-LABEL: v_urem_i64_pow2k_denom:
947 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948 ; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
949 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
950 ; CHECK-NEXT: s_setpc_b64 s[30:31]
951 %result = urem i64 %num, 4096
955 define <2 x i64> @v_urem_v2i64_pow2k_denom(<2 x i64> %num) {
956 ; CHECK-LABEL: v_urem_v2i64_pow2k_denom:
958 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
959 ; CHECK-NEXT: v_and_b32_e32 v0, 0xfff, v0
960 ; CHECK-NEXT: v_and_b32_e32 v2, 0xfff, v2
961 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
962 ; CHECK-NEXT: v_mov_b32_e32 v3, 0
963 ; CHECK-NEXT: s_setpc_b64 s[30:31]
964 %result = urem <2 x i64> %num, <i64 4096, i64 4096>
965 ret <2 x i64> %result
968 define i64 @v_urem_i64_oddk_denom(i64 %num) {
969 ; CHECK-LABEL: v_urem_i64_oddk_denom:
971 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972 ; CHECK-NEXT: s_mov_b32 s4, 0x12d8fb
973 ; CHECK-NEXT: v_mov_b32_e32 v2, 0x12d8fb
974 ; CHECK-NEXT: v_cvt_f32_u32_e32 v3, 0x12d8fb
975 ; CHECK-NEXT: v_cvt_f32_ubyte0_e32 v4, 0
976 ; CHECK-NEXT: s_mov_b32 s5, 0xffed2705
977 ; CHECK-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
978 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v3, v3
979 ; CHECK-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
980 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
981 ; CHECK-NEXT: v_trunc_f32_e32 v4, v4
982 ; CHECK-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
983 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
984 ; CHECK-NEXT: v_cvt_u32_f32_e32 v3, v3
985 ; CHECK-NEXT: v_mul_lo_u32 v5, v4, s5
986 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, s5
987 ; CHECK-NEXT: v_mul_hi_u32 v7, s5, v3
988 ; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v5, v3
989 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
990 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6
991 ; CHECK-NEXT: v_mul_hi_u32 v8, v3, v6
992 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
993 ; CHECK-NEXT: v_mul_lo_u32 v9, v3, v5
994 ; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5
995 ; CHECK-NEXT: v_mul_hi_u32 v11, v3, v5
996 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
997 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
998 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
999 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6
1000 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1001 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
1002 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1003 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11
1004 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1005 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1006 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
1007 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
1008 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1009 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1010 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
1011 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v6
1012 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
1013 ; CHECK-NEXT: v_mul_lo_u32 v5, v3, s5
1014 ; CHECK-NEXT: v_mul_hi_u32 v6, s5, v3
1015 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, s5
1016 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v5
1017 ; CHECK-NEXT: v_mul_hi_u32 v9, v3, v5
1018 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
1019 ; CHECK-NEXT: v_sub_i32_e32 v7, vcc, v7, v3
1020 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
1021 ; CHECK-NEXT: v_mul_lo_u32 v7, v3, v6
1022 ; CHECK-NEXT: v_mul_lo_u32 v10, v4, v6
1023 ; CHECK-NEXT: v_mul_hi_u32 v11, v3, v6
1024 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
1025 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1026 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1027 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v10, v5
1028 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1029 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
1030 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1031 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11
1032 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1033 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1034 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9
1035 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
1036 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1037 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1038 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
1039 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1040 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc
1041 ; CHECK-NEXT: v_mul_lo_u32 v5, v1, v3
1042 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v3
1043 ; CHECK-NEXT: v_mul_hi_u32 v3, v1, v3
1044 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4
1045 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v4
1046 ; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4
1047 ; CHECK-NEXT: v_mul_hi_u32 v4, v1, v4
1048 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
1049 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1050 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v8, v3
1051 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1052 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
1053 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1054 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v9
1055 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1056 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
1057 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
1058 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1059 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1060 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
1061 ; CHECK-NEXT: v_mul_lo_u32 v6, v3, s4
1062 ; CHECK-NEXT: v_mul_hi_u32 v3, s4, v3
1063 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
1064 ; CHECK-NEXT: v_mul_lo_u32 v4, v4, s4
1065 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v4, v3
1066 ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v6
1067 ; CHECK-NEXT: v_subb_u32_e64 v4, vcc, v1, v3, s[4:5]
1068 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
1069 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
1070 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
1071 ; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v0, v2
1072 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v4
1073 ; CHECK-NEXT: v_cndmask_b32_e64 v3, -1, v3, s[6:7]
1074 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1075 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v2
1076 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1077 ; CHECK-NEXT: s_mov_b64 s[4:5], vcc
1078 ; CHECK-NEXT: v_subrev_i32_e32 v6, vcc, 0x12d8fb, v5
1079 ; CHECK-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1080 ; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
1081 ; CHECK-NEXT: v_cndmask_b32_e64 v2, -1, v2, s[4:5]
1082 ; CHECK-NEXT: v_subbrev_u32_e32 v7, vcc, 0, v1, vcc
1083 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1084 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v6, vcc
1085 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1086 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1087 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1088 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v4, v1, vcc
1089 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1090 %result = urem i64 %num, 1235195
1094 define <2 x i64> @v_urem_v2i64_oddk_denom(<2 x i64> %num) {
1095 ; GISEL-LABEL: v_urem_v2i64_oddk_denom:
1097 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1098 ; GISEL-NEXT: s_mov_b32 s4, 0x12d8fb
1099 ; GISEL-NEXT: v_mov_b32_e32 v4, 0x12d8fb
1100 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
1101 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
1102 ; GISEL-NEXT: s_sub_u32 s5, 0, 0x12d8fb
1103 ; GISEL-NEXT: v_madmk_f32 v7, v6, 0x4f800000, v5
1104 ; GISEL-NEXT: s_subb_u32 s6, 0, 0
1105 ; GISEL-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
1106 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7
1107 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v5, v5
1108 ; GISEL-NEXT: s_sub_u32 s7, 0, 0x12d8fb
1109 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1110 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
1111 ; GISEL-NEXT: s_subb_u32 s8, 0, 0
1112 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6
1113 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v5
1114 ; GISEL-NEXT: v_trunc_f32_e32 v7, v7
1115 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8
1116 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7
1117 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1118 ; GISEL-NEXT: v_mac_f32_e32 v5, 0xcf800000, v8
1119 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
1120 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1121 ; GISEL-NEXT: v_mul_lo_u32 v9, s5, v7
1122 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
1123 ; GISEL-NEXT: v_mul_lo_u32 v10, s7, v8
1124 ; GISEL-NEXT: v_mul_lo_u32 v11, s5, v6
1125 ; GISEL-NEXT: v_mul_lo_u32 v12, s6, v6
1126 ; GISEL-NEXT: v_mul_hi_u32 v13, s5, v6
1127 ; GISEL-NEXT: v_mul_lo_u32 v14, s7, v5
1128 ; GISEL-NEXT: v_mul_lo_u32 v15, s8, v5
1129 ; GISEL-NEXT: v_mul_hi_u32 v16, s7, v5
1130 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
1131 ; GISEL-NEXT: v_mul_lo_u32 v12, v7, v11
1132 ; GISEL-NEXT: v_mul_hi_u32 v17, v6, v11
1133 ; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
1134 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
1135 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
1136 ; GISEL-NEXT: v_mul_hi_u32 v18, v5, v14
1137 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
1138 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
1139 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16
1140 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v9
1141 ; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9
1142 ; GISEL-NEXT: v_mul_hi_u32 v19, v6, v9
1143 ; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
1144 ; GISEL-NEXT: v_mul_lo_u32 v20, v5, v10
1145 ; GISEL-NEXT: v_mul_lo_u32 v21, v8, v10
1146 ; GISEL-NEXT: v_mul_hi_u32 v22, v5, v10
1147 ; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
1148 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1149 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1150 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
1151 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1152 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
1153 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1154 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v21, v14
1155 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
1156 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1157 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1158 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v19
1159 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1160 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18
1161 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1162 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22
1163 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1164 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1165 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v17
1166 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v15
1167 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18
1168 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1169 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1170 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1171 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1172 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1173 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v15
1174 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
1175 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
1176 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
1177 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc
1178 ; GISEL-NEXT: v_mul_lo_u32 v9, s5, v6
1179 ; GISEL-NEXT: v_mul_lo_u32 v11, s6, v6
1180 ; GISEL-NEXT: v_mul_hi_u32 v12, s5, v6
1181 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v14
1182 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc
1183 ; GISEL-NEXT: v_mul_lo_u32 v10, s7, v5
1184 ; GISEL-NEXT: v_mul_lo_u32 v13, s8, v5
1185 ; GISEL-NEXT: v_mul_hi_u32 v14, s7, v5
1186 ; GISEL-NEXT: v_mul_lo_u32 v15, s5, v7
1187 ; GISEL-NEXT: v_mul_lo_u32 v16, v7, v9
1188 ; GISEL-NEXT: v_mul_hi_u32 v17, v6, v9
1189 ; GISEL-NEXT: v_mul_hi_u32 v9, v7, v9
1190 ; GISEL-NEXT: v_mul_lo_u32 v18, s7, v8
1191 ; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10
1192 ; GISEL-NEXT: v_mul_hi_u32 v20, v5, v10
1193 ; GISEL-NEXT: v_mul_hi_u32 v10, v8, v10
1194 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v15
1195 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
1196 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1197 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v14
1198 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11
1199 ; GISEL-NEXT: v_mul_lo_u32 v14, v7, v11
1200 ; GISEL-NEXT: v_mul_hi_u32 v15, v6, v11
1201 ; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
1202 ; GISEL-NEXT: v_mul_lo_u32 v18, v5, v12
1203 ; GISEL-NEXT: v_mul_lo_u32 v21, v8, v12
1204 ; GISEL-NEXT: v_mul_hi_u32 v22, v5, v12
1205 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
1206 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
1207 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1208 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v14, v9
1209 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1210 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
1211 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1212 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v21, v10
1213 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
1214 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
1215 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1216 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v15
1217 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1218 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v20
1219 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1220 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v22
1221 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1222 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v16, v13
1223 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1224 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v19, v17
1225 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v21, v18
1226 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
1227 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1228 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
1229 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1230 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1231 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v16, v15
1232 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1233 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1234 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
1235 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
1236 ; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6
1237 ; GISEL-NEXT: v_mul_hi_u32 v11, v0, v6
1238 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
1239 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
1240 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
1241 ; GISEL-NEXT: v_mul_lo_u32 v10, v3, v5
1242 ; GISEL-NEXT: v_mul_hi_u32 v12, v2, v5
1243 ; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
1244 ; GISEL-NEXT: v_mul_lo_u32 v13, v0, v7
1245 ; GISEL-NEXT: v_mul_lo_u32 v14, v1, v7
1246 ; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7
1247 ; GISEL-NEXT: v_mul_hi_u32 v7, v1, v7
1248 ; GISEL-NEXT: v_mul_lo_u32 v16, v2, v8
1249 ; GISEL-NEXT: v_mul_lo_u32 v17, v3, v8
1250 ; GISEL-NEXT: v_mul_hi_u32 v18, v2, v8
1251 ; GISEL-NEXT: v_mul_hi_u32 v8, v3, v8
1252 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
1253 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1254 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v14, v6
1255 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1256 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v16
1257 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1258 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v17, v5
1259 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1260 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1261 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1262 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v15
1263 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1264 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1265 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1266 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v18
1267 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1268 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
1269 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v14, v11
1270 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v16, v10
1271 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v12
1272 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
1273 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1274 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
1275 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1276 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1277 ; GISEL-NEXT: v_mul_lo_u32 v11, v6, s4
1278 ; GISEL-NEXT: v_mul_hi_u32 v6, s4, v6
1279 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
1280 ; GISEL-NEXT: v_mul_lo_u32 v12, v5, s4
1281 ; GISEL-NEXT: v_mul_hi_u32 v5, s4, v5
1282 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v9
1283 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1284 ; GISEL-NEXT: v_mul_lo_u32 v7, v7, s4
1285 ; GISEL-NEXT: v_mul_lo_u32 v8, v8, s4
1286 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v7, v6
1287 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v8, v5
1288 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
1289 ; GISEL-NEXT: v_subb_u32_e64 v7, vcc, v1, v6, s[4:5]
1290 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v1, v6
1291 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
1292 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
1293 ; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12
1294 ; GISEL-NEXT: v_subb_u32_e64 v8, vcc, v3, v5, s[6:7]
1295 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
1296 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
1297 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1298 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, v2, v4
1299 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7
1300 ; GISEL-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[8:9]
1301 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1302 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
1303 ; GISEL-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[4:5]
1304 ; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
1305 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4
1306 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1307 ; GISEL-NEXT: s_mov_b64 s[4:5], vcc
1308 ; GISEL-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
1309 ; GISEL-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4
1310 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
1311 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4
1312 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
1313 ; GISEL-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
1314 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
1315 ; GISEL-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5]
1316 ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4
1317 ; GISEL-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
1318 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1319 ; GISEL-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5]
1320 ; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
1321 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
1322 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
1323 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10
1324 ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5]
1325 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
1326 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1327 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1328 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
1329 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v5
1330 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5]
1331 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
1332 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
1333 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1335 ; CGP-LABEL: v_urem_v2i64_oddk_denom:
1337 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1338 ; CGP-NEXT: s_mov_b32 s4, 0x12d8fb
1339 ; CGP-NEXT: v_mov_b32_e32 v4, 0x12d8fb
1340 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, 0x12d8fb
1341 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
1342 ; CGP-NEXT: s_mov_b32 s5, 0xffed2705
1343 ; CGP-NEXT: v_cvt_f32_u32_e32 v7, 0x12d8fb
1344 ; CGP-NEXT: v_cvt_f32_ubyte0_e32 v8, 0
1345 ; CGP-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
1346 ; CGP-NEXT: v_mac_f32_e32 v7, 0x4f800000, v8
1347 ; CGP-NEXT: v_rcp_iflag_f32_e32 v5, v5
1348 ; CGP-NEXT: v_rcp_iflag_f32_e32 v6, v7
1349 ; CGP-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
1350 ; CGP-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1351 ; CGP-NEXT: v_mul_f32_e32 v7, 0x2f800000, v5
1352 ; CGP-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
1353 ; CGP-NEXT: v_trunc_f32_e32 v7, v7
1354 ; CGP-NEXT: v_trunc_f32_e32 v8, v8
1355 ; CGP-NEXT: v_mac_f32_e32 v5, 0xcf800000, v7
1356 ; CGP-NEXT: v_cvt_u32_f32_e32 v7, v7
1357 ; CGP-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
1358 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
1359 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
1360 ; CGP-NEXT: v_mul_lo_u32 v9, v7, s5
1361 ; CGP-NEXT: v_cvt_u32_f32_e32 v6, v6
1362 ; CGP-NEXT: v_mul_lo_u32 v10, v8, s5
1363 ; CGP-NEXT: v_mul_lo_u32 v11, v5, s5
1364 ; CGP-NEXT: v_mul_hi_u32 v12, s5, v5
1365 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v9, v5
1366 ; CGP-NEXT: v_mul_lo_u32 v13, v6, s5
1367 ; CGP-NEXT: v_mul_hi_u32 v14, s5, v6
1368 ; CGP-NEXT: v_sub_i32_e32 v10, vcc, v10, v6
1369 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
1370 ; CGP-NEXT: v_mul_lo_u32 v12, v7, v11
1371 ; CGP-NEXT: v_mul_hi_u32 v15, v5, v11
1372 ; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
1373 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
1374 ; CGP-NEXT: v_mul_lo_u32 v14, v8, v13
1375 ; CGP-NEXT: v_mul_hi_u32 v16, v6, v13
1376 ; CGP-NEXT: v_mul_hi_u32 v13, v8, v13
1377 ; CGP-NEXT: v_mul_lo_u32 v17, v5, v9
1378 ; CGP-NEXT: v_mul_lo_u32 v18, v7, v9
1379 ; CGP-NEXT: v_mul_hi_u32 v19, v5, v9
1380 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v9
1381 ; CGP-NEXT: v_mul_lo_u32 v20, v6, v10
1382 ; CGP-NEXT: v_mul_lo_u32 v21, v8, v10
1383 ; CGP-NEXT: v_mul_hi_u32 v22, v6, v10
1384 ; CGP-NEXT: v_mul_hi_u32 v10, v8, v10
1385 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1386 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1387 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v18, v11
1388 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1389 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v20
1390 ; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1391 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v21, v13
1392 ; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
1393 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
1394 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1395 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v19
1396 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1397 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
1398 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1399 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v22
1400 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1401 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
1402 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v18, v15
1403 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v20, v14
1404 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v21, v16
1405 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1406 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1407 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1408 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1409 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1410 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v14
1411 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v12
1412 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
1413 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v11
1414 ; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v9, vcc
1415 ; CGP-NEXT: v_mul_lo_u32 v9, v5, s5
1416 ; CGP-NEXT: v_mul_hi_u32 v11, s5, v5
1417 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
1418 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v10, vcc
1419 ; CGP-NEXT: v_mul_lo_u32 v10, v6, s5
1420 ; CGP-NEXT: v_mul_hi_u32 v12, s5, v6
1421 ; CGP-NEXT: v_mul_lo_u32 v13, v7, s5
1422 ; CGP-NEXT: v_mul_lo_u32 v14, v7, v9
1423 ; CGP-NEXT: v_mul_hi_u32 v15, v5, v9
1424 ; CGP-NEXT: v_mul_hi_u32 v9, v7, v9
1425 ; CGP-NEXT: v_mul_lo_u32 v16, v8, s5
1426 ; CGP-NEXT: v_mul_lo_u32 v17, v8, v10
1427 ; CGP-NEXT: v_mul_hi_u32 v18, v6, v10
1428 ; CGP-NEXT: v_mul_hi_u32 v10, v8, v10
1429 ; CGP-NEXT: v_sub_i32_e32 v13, vcc, v13, v5
1430 ; CGP-NEXT: v_sub_i32_e32 v16, vcc, v16, v6
1431 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
1432 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12
1433 ; CGP-NEXT: v_mul_lo_u32 v13, v5, v11
1434 ; CGP-NEXT: v_mul_lo_u32 v16, v7, v11
1435 ; CGP-NEXT: v_mul_hi_u32 v19, v5, v11
1436 ; CGP-NEXT: v_mul_hi_u32 v11, v7, v11
1437 ; CGP-NEXT: v_mul_lo_u32 v20, v6, v12
1438 ; CGP-NEXT: v_mul_lo_u32 v21, v8, v12
1439 ; CGP-NEXT: v_mul_hi_u32 v22, v6, v12
1440 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v12
1441 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1442 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1443 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v16, v9
1444 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1445 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v20
1446 ; CGP-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1447 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v21, v10
1448 ; CGP-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
1449 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
1450 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1451 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v19
1452 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1453 ; CGP-NEXT: v_add_i32_e32 v17, vcc, v17, v18
1454 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1455 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v22
1456 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1457 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1458 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15
1459 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v20, v17
1460 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v21, v18
1461 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
1462 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1463 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v15
1464 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1465 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1466 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15
1467 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1468 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1469 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
1470 ; CGP-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
1471 ; CGP-NEXT: v_mul_lo_u32 v9, v1, v5
1472 ; CGP-NEXT: v_mul_hi_u32 v11, v0, v5
1473 ; CGP-NEXT: v_mul_hi_u32 v5, v1, v5
1474 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1475 ; CGP-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
1476 ; CGP-NEXT: v_mul_lo_u32 v10, v3, v6
1477 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v6
1478 ; CGP-NEXT: v_mul_hi_u32 v6, v3, v6
1479 ; CGP-NEXT: v_mul_lo_u32 v13, v0, v7
1480 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v7
1481 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v7
1482 ; CGP-NEXT: v_mul_hi_u32 v7, v1, v7
1483 ; CGP-NEXT: v_mul_lo_u32 v16, v2, v8
1484 ; CGP-NEXT: v_mul_lo_u32 v17, v3, v8
1485 ; CGP-NEXT: v_mul_hi_u32 v18, v2, v8
1486 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
1487 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v13
1488 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1489 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v14, v5
1490 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1491 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v16
1492 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1493 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v17, v6
1494 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1495 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1496 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1497 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v15
1498 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1499 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1500 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1501 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v18
1502 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1503 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v13, v9
1504 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v14, v11
1505 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v16, v10
1506 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v17, v12
1507 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v9
1508 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1509 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1510 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1511 ; CGP-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1512 ; CGP-NEXT: v_mul_lo_u32 v11, v5, s4
1513 ; CGP-NEXT: v_mul_hi_u32 v5, s4, v5
1514 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
1515 ; CGP-NEXT: v_mul_lo_u32 v12, v6, s4
1516 ; CGP-NEXT: v_mul_hi_u32 v6, s4, v6
1517 ; CGP-NEXT: v_add_i32_e32 v7, vcc, v7, v9
1518 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1519 ; CGP-NEXT: v_mul_lo_u32 v7, v7, s4
1520 ; CGP-NEXT: v_mul_lo_u32 v8, v8, s4
1521 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v7, v5
1522 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
1523 ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v11
1524 ; CGP-NEXT: v_subb_u32_e64 v7, vcc, v1, v5, s[4:5]
1525 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v5
1526 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
1527 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1528 ; CGP-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v12
1529 ; CGP-NEXT: v_subb_u32_e64 v8, vcc, v3, v6, s[6:7]
1530 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v6
1531 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
1532 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
1533 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v2, v4
1534 ; CGP-NEXT: v_cmp_eq_u32_e64 s[8:9], 0, v7
1535 ; CGP-NEXT: v_cndmask_b32_e64 v5, -1, v5, s[8:9]
1536 ; CGP-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[4:5]
1537 ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
1538 ; CGP-NEXT: v_cndmask_b32_e64 v6, -1, v6, s[4:5]
1539 ; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[6:7]
1540 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v4
1541 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1542 ; CGP-NEXT: s_mov_b64 s[4:5], vcc
1543 ; CGP-NEXT: v_subrev_i32_e32 v11, vcc, 0x12d8fb, v9
1544 ; CGP-NEXT: v_sub_i32_e64 v12, s[6:7], v0, v4
1545 ; CGP-NEXT: v_subbrev_u32_e64 v1, s[6:7], 0, v1, s[6:7]
1546 ; CGP-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v4
1547 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
1548 ; CGP-NEXT: v_subbrev_u32_e64 v3, s[4:5], 0, v3, s[4:5]
1549 ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1
1550 ; CGP-NEXT: v_cndmask_b32_e64 v13, -1, v13, s[4:5]
1551 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v12, v4
1552 ; CGP-NEXT: v_subbrev_u32_e64 v14, s[4:5], 0, v1, s[4:5]
1553 ; CGP-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3
1554 ; CGP-NEXT: v_cndmask_b32_e64 v10, -1, v10, s[4:5]
1555 ; CGP-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v3, vcc
1556 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
1557 ; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v4, vcc
1558 ; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v10
1559 ; CGP-NEXT: v_cndmask_b32_e64 v9, v9, v11, s[4:5]
1560 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v14, vcc
1561 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
1562 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1563 ; CGP-NEXT: v_cndmask_b32_e64 v3, v3, v15, s[4:5]
1564 ; CGP-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6
1565 ; CGP-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[4:5]
1566 ; CGP-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc
1567 ; CGP-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
1568 ; CGP-NEXT: s_setpc_b64 s[30:31]
1569 %result = urem <2 x i64> %num, <i64 1235195, i64 1235195>
1570 ret <2 x i64> %result
1573 define i64 @v_urem_i64_pow2_shl_denom(i64 %x, i64 %y) {
1574 ; CHECK-LABEL: v_urem_i64_pow2_shl_denom:
1576 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1577 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
1578 ; CHECK-NEXT: v_mov_b32_e32 v4, v1
1579 ; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000
1580 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
1581 ; CHECK-NEXT: v_mov_b32_e32 v7, 0
1582 ; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2
1583 ; CHECK-NEXT: v_or_b32_e32 v8, v4, v6
1584 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8]
1585 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5
1586 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
1587 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
1588 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1589 ; CHECK-NEXT: s_cbranch_execnz .LBB7_3
1590 ; CHECK-NEXT: ; %bb.1: ; %Flow
1591 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
1592 ; CHECK-NEXT: s_cbranch_execnz .LBB7_4
1593 ; CHECK-NEXT: .LBB7_2:
1594 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
1595 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1596 ; CHECK-NEXT: .LBB7_3:
1597 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v6
1598 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
1599 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v6, vcc
1600 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v0
1601 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
1602 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
1603 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
1604 ; CHECK-NEXT: v_trunc_f32_e32 v2, v2
1605 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
1606 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
1607 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1608 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v2
1609 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v0
1610 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
1611 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v0
1612 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
1613 ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v9
1614 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
1615 ; CHECK-NEXT: v_mul_hi_u32 v9, v2, v9
1616 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
1617 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
1618 ; CHECK-NEXT: v_mul_lo_u32 v13, v2, v8
1619 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
1620 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8
1621 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1622 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1623 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
1624 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1625 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1626 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1627 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
1628 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1629 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1630 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
1631 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1632 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1633 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1634 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1635 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
1636 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc
1637 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v0
1638 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
1639 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v0
1640 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
1641 ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v8
1642 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
1643 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8
1644 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1
1645 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9
1646 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v1
1647 ; CHECK-NEXT: v_mul_lo_u32 v9, v2, v1
1648 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1
1649 ; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
1650 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1651 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1652 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1653 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1654 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
1655 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1656 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1657 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1658 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1659 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1660 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1661 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1662 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1663 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
1664 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
1665 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1666 ; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
1667 ; CHECK-NEXT: v_mul_hi_u32 v7, v3, v0
1668 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
1669 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v1
1670 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1
1671 ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1
1672 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
1673 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
1674 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1675 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
1676 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1677 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1678 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1679 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
1680 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1681 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1682 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1683 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1684 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1685 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1686 ; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0
1687 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0
1688 ; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
1689 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1690 ; CHECK-NEXT: v_mul_lo_u32 v1, v5, v1
1691 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1
1692 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v1, v0
1693 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v7
1694 ; CHECK-NEXT: v_subb_u32_e64 v2, s[4:5], v4, v0, vcc
1695 ; CHECK-NEXT: v_sub_i32_e64 v0, s[4:5], v4, v0
1696 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v5
1697 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
1698 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6
1699 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1700 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc
1701 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6
1702 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v4, v3, vcc
1703 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v1, v5
1704 ; CHECK-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v0, vcc
1705 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v5
1706 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1707 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v0, v6, vcc
1708 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v7, v6
1709 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, vcc
1710 ; CHECK-NEXT: v_sub_i32_e32 v5, vcc, v4, v5
1711 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
1712 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6
1713 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v8, vcc
1714 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1715 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
1716 ; CHECK-NEXT: v_cndmask_b32_e32 v5, v7, v0, vcc
1717 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1718 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v4, vcc
1719 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v2, v5, vcc
1720 ; CHECK-NEXT: ; implicit-def: $vgpr2
1721 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
1722 ; CHECK-NEXT: ; implicit-def: $vgpr3
1723 ; CHECK-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
1724 ; CHECK-NEXT: s_cbranch_execz .LBB7_2
1725 ; CHECK-NEXT: .LBB7_4:
1726 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
1727 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
1728 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1729 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1730 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
1731 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
1732 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1733 ; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
1734 ; CHECK-NEXT: v_mul_lo_u32 v0, v0, v5
1735 ; CHECK-NEXT: v_sub_i32_e32 v0, vcc, v3, v0
1736 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v5
1737 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
1738 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1739 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v0, v5
1740 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5
1741 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
1742 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
1743 ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
1744 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1745 %shl.y = shl i64 4096, %y
1746 %r = urem i64 %x, %shl.y
1750 define <2 x i64> @v_urem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
1751 ; GISEL-LABEL: v_urem_v2i64_pow2_shl_denom:
1753 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1754 ; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000
1755 ; GISEL-NEXT: v_lshl_b64 v[7:8], s[4:5], v4
1756 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v6
1757 ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v7
1758 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v8
1759 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v7
1760 ; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v4
1761 ; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v5
1762 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v4
1763 ; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v8, vcc
1764 ; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v5, s[4:5]
1765 ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11
1766 ; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14
1767 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10
1768 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13
1769 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10
1770 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11
1771 ; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10
1772 ; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11
1773 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13
1774 ; GISEL-NEXT: v_trunc_f32_e32 v14, v14
1775 ; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13
1776 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
1777 ; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14
1778 ; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14
1779 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10
1780 ; GISEL-NEXT: v_mul_lo_u32 v16, v6, v13
1781 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
1782 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14
1783 ; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11
1784 ; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11
1785 ; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11
1786 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
1787 ; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18
1788 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
1789 ; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17
1790 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
1791 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18
1792 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20
1793 ; GISEL-NEXT: v_mul_lo_u32 v19, v6, v10
1794 ; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10
1795 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16
1796 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v10
1797 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
1798 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19
1799 ; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16
1800 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
1801 ; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19
1802 ; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21
1803 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19
1804 ; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18
1805 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16
1806 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
1807 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17
1808 ; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18
1809 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16
1810 ; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20
1811 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17
1812 ; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20
1813 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
1814 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9]
1815 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
1816 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11]
1817 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15]
1818 ; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22
1819 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
1820 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5]
1821 ; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23
1822 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13]
1823 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17]
1824 ; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24
1825 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
1826 ; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22
1827 ; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19
1828 ; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16
1829 ; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17
1830 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18
1831 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1832 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18
1833 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1834 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19
1835 ; GISEL-NEXT: v_mul_lo_u32 v20, v6, v10
1836 ; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10
1837 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18
1838 ; GISEL-NEXT: v_mul_hi_u32 v18, v6, v10
1839 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19
1840 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11
1841 ; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11
1842 ; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
1843 ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11
1844 ; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
1845 ; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20
1846 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v13
1847 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6
1848 ; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19
1849 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14
1850 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
1851 ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20
1852 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20
1853 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18
1854 ; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19
1855 ; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19
1856 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16
1857 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v6
1858 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1859 ; GISEL-NEXT: v_mul_lo_u32 v16, v13, v6
1860 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17
1861 ; GISEL-NEXT: v_mul_hi_u32 v12, v10, v6
1862 ; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
1863 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9
1864 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
1865 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9
1866 ; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17
1867 ; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9
1868 ; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9
1869 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
1870 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1871 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15
1872 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
1873 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1874 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12
1875 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11]
1876 ; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18
1877 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1878 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19
1879 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
1880 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20
1881 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1882 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
1883 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9]
1884 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20
1885 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19
1886 ; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17
1887 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1888 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1889 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1890 ; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17
1891 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18
1892 ; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19
1893 ; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10
1894 ; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10
1895 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10
1896 ; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11
1897 ; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11
1898 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11
1899 ; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v12
1900 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
1901 ; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc
1902 ; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
1903 ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6
1904 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v6
1905 ; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6
1906 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
1907 ; GISEL-NEXT: v_mul_lo_u32 v15, v2, v9
1908 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9
1909 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
1910 ; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9
1911 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v9
1912 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v13, v10
1913 ; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v18, v15
1914 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v20, v11
1915 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1916 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1917 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
1918 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1919 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
1920 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v10, v14
1921 ; GISEL-NEXT: v_add_i32_e64 v13, s[6:7], v13, v19
1922 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v16
1923 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1924 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1925 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1926 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1927 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v15, v13
1928 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v14
1929 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v16
1930 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1931 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v13
1932 ; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v15
1933 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1934 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1935 ; GISEL-NEXT: v_mul_lo_u32 v16, v7, v10
1936 ; GISEL-NEXT: v_mul_lo_u32 v17, v8, v10
1937 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
1938 ; GISEL-NEXT: v_mul_lo_u32 v18, v4, v11
1939 ; GISEL-NEXT: v_mul_lo_u32 v19, v5, v11
1940 ; GISEL-NEXT: v_mul_hi_u32 v11, v4, v11
1941 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1942 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
1943 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16
1944 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v18
1945 ; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v13
1946 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v12
1947 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v0, v7
1948 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v2, v4
1949 ; GISEL-NEXT: v_sub_i32_e64 v12, s[10:11], v0, v7
1950 ; GISEL-NEXT: v_sub_i32_e64 v13, s[12:13], v2, v4
1951 ; GISEL-NEXT: v_mul_lo_u32 v6, v7, v6
1952 ; GISEL-NEXT: v_mul_lo_u32 v9, v4, v9
1953 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[6:7]
1954 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[8:9]
1955 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v7
1956 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[8:9], v13, v4
1957 ; GISEL-NEXT: v_sub_i32_e64 v7, s[14:15], v12, v7
1958 ; GISEL-NEXT: v_sub_i32_e64 v4, s[16:17], v13, v4
1959 ; GISEL-NEXT: v_add_i32_e64 v6, s[18:19], v17, v6
1960 ; GISEL-NEXT: v_add_i32_e64 v9, s[18:19], v19, v9
1961 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7]
1962 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[8:9]
1963 ; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v10
1964 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v11
1965 ; GISEL-NEXT: v_subb_u32_e64 v10, s[6:7], v1, v6, vcc
1966 ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v6
1967 ; GISEL-NEXT: v_subb_u32_e64 v6, s[6:7], v3, v9, s[4:5]
1968 ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v9
1969 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v8
1970 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
1971 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v6, v5
1972 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v5, s[4:5]
1973 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v8
1974 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v6, v5
1975 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
1976 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
1977 ; GISEL-NEXT: v_subbrev_u32_e64 v18, vcc, 0, v1, s[10:11]
1978 ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v8, s[10:11]
1979 ; GISEL-NEXT: v_cndmask_b32_e64 v9, v9, v14, s[4:5]
1980 ; GISEL-NEXT: v_subbrev_u32_e64 v14, vcc, 0, v3, s[12:13]
1981 ; GISEL-NEXT: v_subb_u32_e64 v3, vcc, v3, v5, s[12:13]
1982 ; GISEL-NEXT: v_cndmask_b32_e64 v11, v11, v15, s[8:9]
1983 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v18, v8
1984 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[14:15]
1985 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v14, v5
1986 ; GISEL-NEXT: v_subbrev_u32_e64 v3, s[6:7], 0, v3, s[16:17]
1987 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v18, v8
1988 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v14, v5
1989 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1990 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
1991 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
1992 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
1993 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v5, v16, s[6:7]
1994 ; GISEL-NEXT: v_cndmask_b32_e64 v8, v8, v17, s[8:9]
1995 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5
1996 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v8
1997 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v12, v7, s[6:7]
1998 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v13, v4, s[8:9]
1999 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v18, v1, s[6:7]
2000 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v3, s[8:9]
2001 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
2002 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v4, s[4:5]
2003 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc
2004 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[4:5]
2005 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2007 ; CGP-LABEL: v_urem_v2i64_pow2_shl_denom:
2009 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2010 ; CGP-NEXT: v_mov_b32_e32 v8, v0
2011 ; CGP-NEXT: v_mov_b32_e32 v9, v1
2012 ; CGP-NEXT: v_mov_b32_e32 v5, v2
2013 ; CGP-NEXT: v_mov_b32_e32 v7, v3
2014 ; CGP-NEXT: s_mov_b64 s[4:5], 0x1000
2015 ; CGP-NEXT: v_mov_b32_e32 v10, 0x1000
2016 ; CGP-NEXT: v_mov_b32_e32 v11, 0
2017 ; CGP-NEXT: v_mov_b32_e32 v0, 0
2018 ; CGP-NEXT: v_lshl_b64 v[2:3], s[4:5], v4
2019 ; CGP-NEXT: v_or_b32_e32 v1, v9, v3
2020 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
2021 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
2022 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
2023 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
2024 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
2025 ; CGP-NEXT: s_cbranch_execz .LBB8_2
2026 ; CGP-NEXT: ; %bb.1:
2027 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3
2028 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
2029 ; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc
2030 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v0
2031 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4
2032 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
2033 ; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0
2034 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
2035 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4
2036 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
2037 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
2038 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v4
2039 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v0
2040 ; CGP-NEXT: v_mul_lo_u32 v15, v12, v0
2041 ; CGP-NEXT: v_mul_hi_u32 v16, v1, v0
2042 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
2043 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v14
2044 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v14
2045 ; CGP-NEXT: v_mul_hi_u32 v14, v4, v14
2046 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
2047 ; CGP-NEXT: v_mul_lo_u32 v16, v0, v13
2048 ; CGP-NEXT: v_mul_lo_u32 v18, v4, v13
2049 ; CGP-NEXT: v_mul_hi_u32 v19, v0, v13
2050 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13
2051 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
2052 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2053 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14
2054 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2055 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17
2056 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2057 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19
2058 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2059 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
2060 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17
2061 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
2062 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2063 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
2064 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
2065 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
2066 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc
2067 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v0
2068 ; CGP-NEXT: v_mul_lo_u32 v12, v12, v0
2069 ; CGP-NEXT: v_mul_hi_u32 v14, v1, v0
2070 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v4
2071 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v13
2072 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v13
2073 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13
2074 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
2075 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14
2076 ; CGP-NEXT: v_mul_lo_u32 v12, v0, v1
2077 ; CGP-NEXT: v_mul_lo_u32 v14, v4, v1
2078 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v1
2079 ; CGP-NEXT: v_mul_hi_u32 v1, v4, v1
2080 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
2081 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2082 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
2083 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2084 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
2085 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2086 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17
2087 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2088 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
2089 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
2090 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
2091 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2092 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
2093 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13
2094 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12
2095 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
2096 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v0
2097 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v0
2098 ; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
2099 ; CGP-NEXT: v_mul_lo_u32 v13, v8, v1
2100 ; CGP-NEXT: v_mul_lo_u32 v14, v9, v1
2101 ; CGP-NEXT: v_mul_hi_u32 v15, v8, v1
2102 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1
2103 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13
2104 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2105 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0
2106 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2107 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12
2108 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2109 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15
2110 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2111 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
2112 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
2113 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
2114 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2115 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4
2116 ; CGP-NEXT: v_mul_lo_u32 v12, v2, v0
2117 ; CGP-NEXT: v_mul_lo_u32 v13, v3, v0
2118 ; CGP-NEXT: v_mul_hi_u32 v0, v2, v0
2119 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
2120 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1
2121 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v13, v1
2122 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v1, v0
2123 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v12
2124 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v9, v0, vcc
2125 ; CGP-NEXT: v_sub_i32_e64 v0, s[4:5], v9, v0
2126 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v2
2127 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2128 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3
2129 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
2130 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
2131 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
2132 ; CGP-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
2133 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v1, v2
2134 ; CGP-NEXT: v_subbrev_u32_e64 v12, s[4:5], 0, v0, vcc
2135 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v9, v2
2136 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2137 ; CGP-NEXT: v_subb_u32_e32 v0, vcc, v0, v3, vcc
2138 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v12, v3
2139 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
2140 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v9, v2
2141 ; CGP-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
2142 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3
2143 ; CGP-NEXT: v_cndmask_b32_e32 v3, v14, v13, vcc
2144 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
2145 ; CGP-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc
2146 ; CGP-NEXT: v_cndmask_b32_e32 v3, v12, v0, vcc
2147 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
2148 ; CGP-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc
2149 ; CGP-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
2150 ; CGP-NEXT: ; implicit-def: $vgpr4
2151 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
2152 ; CGP-NEXT: ; implicit-def: $vgpr8
2153 ; CGP-NEXT: .LBB8_2: ; %Flow1
2154 ; CGP-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
2155 ; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6
2156 ; CGP-NEXT: s_xor_b64 exec, exec, s[4:5]
2157 ; CGP-NEXT: s_cbranch_execz .LBB8_4
2158 ; CGP-NEXT: ; %bb.3:
2159 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4
2160 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
2161 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
2162 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
2163 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
2164 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
2165 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
2166 ; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
2167 ; CGP-NEXT: v_mul_lo_u32 v0, v0, v2
2168 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v8, v0
2169 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
2170 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
2171 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2172 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v0, v2
2173 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
2174 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
2175 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2176 ; CGP-NEXT: .LBB8_4:
2177 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
2178 ; CGP-NEXT: v_or_b32_e32 v3, v7, v10
2179 ; CGP-NEXT: v_mov_b32_e32 v2, 0
2180 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2181 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9
2182 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
2183 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
2184 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
2185 ; CGP-NEXT: s_cbranch_execnz .LBB8_7
2186 ; CGP-NEXT: ; %bb.5: ; %Flow
2187 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
2188 ; CGP-NEXT: s_cbranch_execnz .LBB8_8
2189 ; CGP-NEXT: .LBB8_6:
2190 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
2191 ; CGP-NEXT: s_setpc_b64 s[30:31]
2192 ; CGP-NEXT: .LBB8_7:
2193 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
2194 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
2195 ; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v10, vcc
2196 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v2
2197 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
2198 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
2199 ; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2
2200 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
2201 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
2202 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
2203 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
2204 ; CGP-NEXT: v_mul_lo_u32 v8, v3, v4
2205 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v2
2206 ; CGP-NEXT: v_mul_lo_u32 v12, v6, v2
2207 ; CGP-NEXT: v_mul_hi_u32 v13, v3, v2
2208 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8
2209 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v11
2210 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
2211 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v11
2212 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13
2213 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v8
2214 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v8
2215 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8
2216 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8
2217 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
2218 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2219 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
2220 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2221 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
2222 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2223 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
2224 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2225 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
2226 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
2227 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
2228 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2229 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
2230 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
2231 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
2232 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc
2233 ; CGP-NEXT: v_mul_lo_u32 v8, v3, v2
2234 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
2235 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v2
2236 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v4
2237 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v8
2238 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v8
2239 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8
2240 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3
2241 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11
2242 ; CGP-NEXT: v_mul_lo_u32 v6, v2, v3
2243 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v3
2244 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3
2245 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3
2246 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
2247 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2248 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
2249 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2250 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
2251 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
2252 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14
2253 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2254 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
2255 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
2256 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
2257 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2258 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
2259 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
2260 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
2261 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc
2262 ; CGP-NEXT: v_mul_lo_u32 v4, v7, v2
2263 ; CGP-NEXT: v_mul_hi_u32 v6, v5, v2
2264 ; CGP-NEXT: v_mul_hi_u32 v2, v7, v2
2265 ; CGP-NEXT: v_mul_lo_u32 v8, v5, v3
2266 ; CGP-NEXT: v_mul_lo_u32 v11, v7, v3
2267 ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3
2268 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3
2269 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
2270 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2271 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
2272 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2273 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
2274 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2275 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
2276 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
2277 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
2278 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6
2279 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
2280 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
2281 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
2282 ; CGP-NEXT: v_mul_lo_u32 v6, v9, v2
2283 ; CGP-NEXT: v_mul_lo_u32 v8, v10, v2
2284 ; CGP-NEXT: v_mul_hi_u32 v2, v9, v2
2285 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
2286 ; CGP-NEXT: v_mul_lo_u32 v3, v9, v3
2287 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v8, v3
2288 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
2289 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v6
2290 ; CGP-NEXT: v_subb_u32_e64 v4, s[4:5], v7, v2, vcc
2291 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v7, v2
2292 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v9
2293 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[4:5]
2294 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v10
2295 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
2296 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc
2297 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10
2298 ; CGP-NEXT: v_cndmask_b32_e32 v5, v6, v5, vcc
2299 ; CGP-NEXT: v_sub_i32_e32 v6, vcc, v3, v9
2300 ; CGP-NEXT: v_subbrev_u32_e64 v7, s[4:5], 0, v2, vcc
2301 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v9
2302 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
2303 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v10, vcc
2304 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v7, v10
2305 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
2306 ; CGP-NEXT: v_sub_i32_e32 v9, vcc, v6, v9
2307 ; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
2308 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v7, v10
2309 ; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v8, vcc
2310 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
2311 ; CGP-NEXT: v_cndmask_b32_e32 v6, v6, v9, vcc
2312 ; CGP-NEXT: v_cndmask_b32_e32 v7, v7, v2, vcc
2313 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
2314 ; CGP-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc
2315 ; CGP-NEXT: v_cndmask_b32_e32 v3, v4, v7, vcc
2316 ; CGP-NEXT: ; implicit-def: $vgpr4
2317 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
2318 ; CGP-NEXT: ; implicit-def: $vgpr5
2319 ; CGP-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7]
2320 ; CGP-NEXT: s_cbranch_execz .LBB8_6
2321 ; CGP-NEXT: .LBB8_8:
2322 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
2323 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
2324 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2325 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
2326 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
2327 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
2328 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
2329 ; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
2330 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v9
2331 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, v5, v2
2332 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v9
2333 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9
2334 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
2335 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v2, v9
2336 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v9
2337 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
2338 ; CGP-NEXT: v_mov_b32_e32 v3, 0
2339 ; CGP-NEXT: s_or_b64 exec, exec, s[4:5]
2340 ; CGP-NEXT: s_setpc_b64 s[30:31]
2341 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
2342 %r = urem <2 x i64> %x, %shl.y
2346 define i64 @v_urem_i64_24bit(i64 %num, i64 %den) {
2347 ; GISEL-LABEL: v_urem_i64_24bit:
2349 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2350 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2351 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v2
2352 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
2353 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
2354 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
2355 ; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
2356 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
2357 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
2358 ; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
2359 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
2360 ; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
2361 ; GISEL-NEXT: v_mul_lo_u32 v2, v2, v1
2362 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
2363 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
2364 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
2365 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2366 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
2367 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
2368 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2369 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
2370 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2372 ; CGP-LABEL: v_urem_i64_24bit:
2374 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2375 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2376 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v2
2377 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v0
2378 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v1
2379 ; CGP-NEXT: v_rcp_f32_e32 v4, v3
2380 ; CGP-NEXT: v_mul_f32_e32 v4, v2, v4
2381 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
2382 ; CGP-NEXT: v_mad_f32 v2, -v4, v3, v2
2383 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
2384 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, v3
2385 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[4:5]
2386 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v4, v2
2387 ; CGP-NEXT: v_mul_lo_u32 v1, v2, v1
2388 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
2389 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2390 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2391 ; CGP-NEXT: s_setpc_b64 s[30:31]
2392 %num.mask = and i64 %num, 16777215
2393 %den.mask = and i64 %den, 16777215
2394 %result = urem i64 %num.mask, %den.mask
2398 define <2 x i64> @v_urem_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
2399 ; GISEL-LABEL: v_urem_v2i64_24bit:
2401 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2402 ; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v0
2403 ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
2404 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4
2405 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6
2406 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
2407 ; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v1
2408 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
2409 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
2410 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v0
2411 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v0
2412 ; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
2413 ; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6
2414 ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6
2415 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7
2416 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8
2417 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
2418 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
2419 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
2420 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
2421 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8
2422 ; GISEL-NEXT: v_trunc_f32_e32 v11, v11
2423 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
2424 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
2425 ; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11
2426 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
2427 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
2428 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
2429 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
2430 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
2431 ; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6
2432 ; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6
2433 ; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6
2434 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7
2435 ; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7
2436 ; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7
2437 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
2438 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
2439 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14
2440 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
2441 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
2442 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v17
2443 ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v17
2444 ; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
2445 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
2446 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19
2447 ; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12
2448 ; GISEL-NEXT: v_mul_lo_u32 v19, v8, v12
2449 ; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12
2450 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
2451 ; GISEL-NEXT: v_mul_lo_u32 v23, v7, v13
2452 ; GISEL-NEXT: v_mul_lo_u32 v24, v11, v13
2453 ; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13
2454 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
2455 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
2456 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2457 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14
2458 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
2459 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23
2460 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
2461 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17
2462 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
2463 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
2464 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2465 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22
2466 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
2467 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21
2468 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2469 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25
2470 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
2471 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
2472 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20
2473 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18
2474 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21
2475 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
2476 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2477 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
2478 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2479 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
2480 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18
2481 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
2482 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
2483 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
2484 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
2485 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6
2486 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6
2487 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6
2488 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
2489 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc
2490 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7
2491 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7
2492 ; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7
2493 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8
2494 ; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12
2495 ; GISEL-NEXT: v_mul_hi_u32 v17, v6, v12
2496 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
2497 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11
2498 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13
2499 ; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
2500 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
2501 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
2502 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9
2503 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
2504 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
2505 ; GISEL-NEXT: v_mul_lo_u32 v9, v6, v4
2506 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
2507 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v4
2508 ; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4
2509 ; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5
2510 ; GISEL-NEXT: v_mul_lo_u32 v20, v11, v5
2511 ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5
2512 ; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
2513 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
2514 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2515 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
2516 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2517 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15
2518 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2519 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13
2520 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
2521 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
2522 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
2523 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
2524 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2525 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
2526 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2527 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21
2528 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2529 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
2530 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
2531 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15
2532 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17
2533 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
2534 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2535 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
2536 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2537 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
2538 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14
2539 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
2540 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
2541 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
2542 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc
2543 ; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6
2544 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6
2545 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6
2546 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
2547 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc
2548 ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7
2549 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7
2550 ; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
2551 ; GISEL-NEXT: v_mul_lo_u32 v12, v3, v4
2552 ; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
2553 ; GISEL-NEXT: v_mul_hi_u32 v14, v3, v4
2554 ; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
2555 ; GISEL-NEXT: v_mul_lo_u32 v15, v2, v5
2556 ; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5
2557 ; GISEL-NEXT: v_mul_hi_u32 v17, v2, v5
2558 ; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
2559 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
2560 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2561 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6
2562 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
2563 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
2564 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2565 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7
2566 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2567 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
2568 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2569 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
2570 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
2571 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2572 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2573 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
2574 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2575 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
2576 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v13, v9
2577 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v15, v10
2578 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v16, v11
2579 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
2580 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2581 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
2582 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2583 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
2584 ; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6
2585 ; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6
2586 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
2587 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
2588 ; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7
2589 ; GISEL-NEXT: v_mul_lo_u32 v13, 0, v7
2590 ; GISEL-NEXT: v_mul_hi_u32 v7, v0, v7
2591 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
2592 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
2593 ; GISEL-NEXT: v_mul_lo_u32 v4, v1, v4
2594 ; GISEL-NEXT: v_mul_lo_u32 v5, v0, v5
2595 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v12, v4
2596 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v13, v5
2597 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
2598 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v7
2599 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9
2600 ; GISEL-NEXT: v_subb_u32_e64 v6, s[4:5], 0, v4, vcc
2601 ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], 0, v4
2602 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1
2603 ; GISEL-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
2604 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11
2605 ; GISEL-NEXT: v_subb_u32_e64 v8, s[6:7], 0, v5, s[4:5]
2606 ; GISEL-NEXT: v_sub_i32_e64 v5, s[6:7], 0, v5
2607 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0
2608 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7]
2609 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v6
2610 ; GISEL-NEXT: v_cndmask_b32_e64 v7, -1, v7, s[6:7]
2611 ; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
2612 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
2613 ; GISEL-NEXT: v_cndmask_b32_e32 v9, -1, v9, vcc
2614 ; GISEL-NEXT: v_subbrev_u32_e64 v5, vcc, 0, v5, s[4:5]
2615 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, v3, v1
2616 ; GISEL-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
2617 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v10, v1
2618 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
2619 ; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v0
2620 ; GISEL-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
2621 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v12, v0
2622 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
2623 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v4
2624 ; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v11, vcc
2625 ; GISEL-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
2626 ; GISEL-NEXT: v_subbrev_u32_e32 v14, vcc, 0, v4, vcc
2627 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5
2628 ; GISEL-NEXT: v_cndmask_b32_e32 v13, -1, v13, vcc
2629 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v12, v0
2630 ; GISEL-NEXT: v_subbrev_u32_e32 v15, vcc, 0, v5, vcc
2631 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11
2632 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v1, vcc
2633 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v13
2634 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, v0, s[4:5]
2635 ; GISEL-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc
2636 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
2637 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
2638 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v15, s[4:5]
2639 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v9
2640 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[4:5]
2641 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v6, v4, vcc
2642 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v8, v3, s[4:5]
2643 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2645 ; CGP-LABEL: v_urem_v2i64_24bit:
2647 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2648 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2649 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v2
2650 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v4
2651 ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v6
2652 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v0
2653 ; CGP-NEXT: v_cvt_f32_u32_e32 v5, v2
2654 ; CGP-NEXT: v_cvt_f32_u32_e32 v6, v1
2655 ; CGP-NEXT: v_cvt_f32_u32_e32 v7, v3
2656 ; CGP-NEXT: v_rcp_f32_e32 v8, v5
2657 ; CGP-NEXT: v_rcp_f32_e32 v9, v7
2658 ; CGP-NEXT: v_mul_f32_e32 v8, v4, v8
2659 ; CGP-NEXT: v_mul_f32_e32 v9, v6, v9
2660 ; CGP-NEXT: v_trunc_f32_e32 v8, v8
2661 ; CGP-NEXT: v_trunc_f32_e32 v9, v9
2662 ; CGP-NEXT: v_mad_f32 v4, -v8, v5, v4
2663 ; CGP-NEXT: v_cvt_u32_f32_e32 v8, v8
2664 ; CGP-NEXT: v_mad_f32 v6, -v9, v7, v6
2665 ; CGP-NEXT: v_cvt_u32_f32_e32 v9, v9
2666 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v4|, v5
2667 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, s[4:5]
2668 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v6|, v7
2669 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, s[4:5]
2670 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
2671 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v9, v5
2672 ; CGP-NEXT: v_mul_lo_u32 v2, v4, v2
2673 ; CGP-NEXT: v_mul_lo_u32 v3, v5, v3
2674 ; CGP-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
2675 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v1, v3
2676 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2677 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v1
2678 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2679 ; CGP-NEXT: v_mov_b32_e32 v3, 0
2680 ; CGP-NEXT: s_setpc_b64 s[30:31]
2681 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
2682 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
2683 %result = urem <2 x i64> %num.mask, %den.mask
2684 ret <2 x i64> %result