1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i64 @v_udiv_i64(i64 %num, i64 %den) {
8 ; CHECK-LABEL: v_udiv_i64:
10 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; CHECK-NEXT: v_mov_b32_e32 v4, v0
12 ; CHECK-NEXT: v_mov_b32_e32 v5, v1
13 ; CHECK-NEXT: v_or_b32_e32 v1, v5, v3
14 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
15 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
17 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
18 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
19 ; CHECK-NEXT: s_cbranch_execz .LBB0_2
20 ; CHECK-NEXT: ; %bb.1:
21 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
22 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v3
23 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
24 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
25 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
26 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
27 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
28 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
29 ; CHECK-NEXT: v_trunc_f32_e32 v1, v1
30 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
31 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
32 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
33 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v1
34 ; CHECK-NEXT: v_mul_lo_u32 v9, v6, v0
35 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
36 ; CHECK-NEXT: v_mul_hi_u32 v11, v6, v0
37 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
38 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v9
39 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
40 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v9
41 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
42 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
43 ; CHECK-NEXT: v_mul_lo_u32 v13, v1, v8
44 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
45 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
46 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
47 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
48 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
49 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
50 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
51 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
52 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
53 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
54 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
55 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
56 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
57 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
58 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
59 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
60 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
61 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
62 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0
63 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
64 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v0
65 ; CHECK-NEXT: v_mul_lo_u32 v6, v6, v1
66 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v8
67 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
68 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
69 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
70 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
71 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v6
72 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v6
73 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v6
74 ; CHECK-NEXT: v_mul_hi_u32 v6, v1, v6
75 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
76 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
77 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
78 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
79 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
80 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
81 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
82 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
83 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
84 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
85 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
86 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
87 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
88 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
89 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
90 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v6, vcc
91 ; CHECK-NEXT: v_mul_lo_u32 v6, v5, v0
92 ; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
93 ; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
94 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v1
95 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1
96 ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1
97 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
98 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
99 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
100 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
101 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
102 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
103 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
104 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
105 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
106 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
107 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
108 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
109 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
110 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
111 ; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0
112 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0
113 ; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0
114 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6
115 ; CHECK-NEXT: v_mul_lo_u32 v6, v2, v1
116 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0
117 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc
118 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
119 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10
120 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
121 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
122 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
123 ; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v5, v6, vcc
124 ; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v6
125 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2
126 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
127 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3
128 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
129 ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc
130 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3
131 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc
132 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v2
133 ; CHECK-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
134 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
135 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
136 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3
137 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
138 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3
139 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
140 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
141 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc
142 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc
143 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
144 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
145 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
146 ; CHECK-NEXT: ; implicit-def: $vgpr2
147 ; CHECK-NEXT: ; implicit-def: $vgpr4
148 ; CHECK-NEXT: .LBB0_2: ; %Flow
149 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
150 ; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7]
151 ; CHECK-NEXT: s_cbranch_execz .LBB0_4
152 ; CHECK-NEXT: ; %bb.3:
153 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v2
154 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
155 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
156 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
157 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
158 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
159 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
160 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
161 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
162 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2
163 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
164 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
165 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
166 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
167 ; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
168 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
169 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
170 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
171 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
172 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
173 ; CHECK-NEXT: .LBB0_4:
174 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
175 ; CHECK-NEXT: s_setpc_b64 s[30:31]
176 %result = udiv i64 %num, %den
180 ; FIXME: This is a workaround for not handling uniform VGPR case.
181 declare i32 @llvm.amdgcn.readfirstlane(i32)
183 define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
184 ; CHECK-LABEL: s_udiv_i64:
186 ; CHECK-NEXT: s_or_b64 s[6:7], s[0:1], s[2:3]
187 ; CHECK-NEXT: s_mov_b32 s4, 0
188 ; CHECK-NEXT: s_mov_b32 s5, -1
189 ; CHECK-NEXT: s_and_b64 s[6:7], s[6:7], s[4:5]
190 ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[6:7], 0
191 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2
192 ; CHECK-NEXT: ; %bb.1:
193 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
194 ; CHECK-NEXT: v_mov_b32_e32 v1, s3
195 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s3
196 ; CHECK-NEXT: s_sub_u32 s4, 0, s2
197 ; CHECK-NEXT: s_cselect_b32 s5, 1, 0
198 ; CHECK-NEXT: v_mov_b32_e32 v3, s1
199 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v2
200 ; CHECK-NEXT: s_and_b32 s5, s5, 1
201 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
202 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
203 ; CHECK-NEXT: s_cmp_lg_u32 s5, 0
204 ; CHECK-NEXT: s_subb_u32 s5, 0, s3
205 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
206 ; CHECK-NEXT: v_trunc_f32_e32 v2, v2
207 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
208 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
209 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
210 ; CHECK-NEXT: v_mul_lo_u32 v4, s4, v2
211 ; CHECK-NEXT: v_mul_lo_u32 v5, s4, v0
212 ; CHECK-NEXT: v_mul_lo_u32 v6, s5, v0
213 ; CHECK-NEXT: v_mul_hi_u32 v7, s4, v0
214 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
215 ; CHECK-NEXT: v_mul_lo_u32 v6, v2, v5
216 ; CHECK-NEXT: v_mul_hi_u32 v8, v0, v5
217 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5
218 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
219 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v4
220 ; CHECK-NEXT: v_mul_lo_u32 v9, v2, v4
221 ; CHECK-NEXT: v_mul_hi_u32 v10, v0, v4
222 ; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4
223 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
224 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
225 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v9, v5
226 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
227 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
228 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
229 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v10
230 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
231 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
232 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v8
233 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
234 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
235 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
236 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
237 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
238 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc
239 ; CHECK-NEXT: v_mul_lo_u32 v4, s4, v0
240 ; CHECK-NEXT: v_mul_lo_u32 v5, s5, v0
241 ; CHECK-NEXT: v_mul_hi_u32 v6, s4, v0
242 ; CHECK-NEXT: v_mul_lo_u32 v7, s4, v2
243 ; CHECK-NEXT: v_mul_lo_u32 v8, v2, v4
244 ; CHECK-NEXT: v_mul_hi_u32 v9, v0, v4
245 ; CHECK-NEXT: v_mul_hi_u32 v4, v2, v4
246 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
247 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
248 ; CHECK-NEXT: v_mul_lo_u32 v6, v0, v5
249 ; CHECK-NEXT: v_mul_lo_u32 v7, v2, v5
250 ; CHECK-NEXT: v_mul_hi_u32 v10, v0, v5
251 ; CHECK-NEXT: v_mul_hi_u32 v5, v2, v5
252 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
253 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
254 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4
255 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
256 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
257 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
258 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v10
259 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
260 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
261 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
262 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
263 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
264 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
265 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
266 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
267 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v5, vcc
268 ; CHECK-NEXT: v_mul_lo_u32 v4, s1, v0
269 ; CHECK-NEXT: v_mul_hi_u32 v5, s0, v0
270 ; CHECK-NEXT: v_mul_hi_u32 v0, s1, v0
271 ; CHECK-NEXT: v_mul_lo_u32 v6, s0, v2
272 ; CHECK-NEXT: v_mul_lo_u32 v7, s1, v2
273 ; CHECK-NEXT: v_mul_hi_u32 v8, s0, v2
274 ; CHECK-NEXT: v_mul_hi_u32 v2, s1, v2
275 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
276 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
277 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
278 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
279 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
280 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
281 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v8
282 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
283 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v6, v4
284 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
285 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
286 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
287 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
288 ; CHECK-NEXT: v_mul_lo_u32 v5, s2, v0
289 ; CHECK-NEXT: v_mul_lo_u32 v6, s3, v0
290 ; CHECK-NEXT: v_mul_hi_u32 v7, s2, v0
291 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v0
292 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
293 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v8
294 ; CHECK-NEXT: v_mul_lo_u32 v2, s2, v2
295 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v6, v2
296 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
297 ; CHECK-NEXT: v_sub_i32_e32 v5, vcc, s0, v5
298 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v2, vcc
299 ; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], s1, v2
300 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v5
301 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
302 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3
303 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
304 ; CHECK-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc
305 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3
306 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v7, v6, vcc
307 ; CHECK-NEXT: v_subrev_i32_e32 v3, vcc, s2, v5
308 ; CHECK-NEXT: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
309 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v3
310 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
311 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v1
312 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
313 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v1
314 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc
315 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
316 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v8, v4, vcc
317 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
318 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc
319 ; CHECK-NEXT: s_mov_b32 s5, 0
320 ; CHECK-NEXT: s_branch .LBB1_3
321 ; CHECK-NEXT: .LBB1_2:
322 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
323 ; CHECK-NEXT: .LBB1_3: ; %Flow
324 ; CHECK-NEXT: s_xor_b32 s1, s5, -1
325 ; CHECK-NEXT: s_and_b32 s1, s1, 1
326 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0
327 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
328 ; CHECK-NEXT: ; %bb.4:
329 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, s2
330 ; CHECK-NEXT: s_sub_i32 s1, 0, s2
331 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
332 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
333 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
334 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0
335 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
336 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
337 ; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0
338 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2
339 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
340 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
341 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
342 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
343 ; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1
344 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
345 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
346 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
347 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
348 ; CHECK-NEXT: .LBB1_5:
349 ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
350 ; CHECK-NEXT: s_mov_b32 s1, s0
351 ; CHECK-NEXT: ; return to shader part epilog
352 %result = udiv i64 %num, %den
353 %cast = bitcast i64 %result to <2 x i32>
354 %elt.0 = extractelement <2 x i32> %cast, i32 0
355 %elt.1 = extractelement <2 x i32> %cast, i32 1
356 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
357 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
358 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
359 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
360 %cast.back = bitcast <2 x i32> %ins.1 to i64
364 define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
365 ; GISEL-LABEL: v_udiv_v2i64:
367 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v4
369 ; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v5
370 ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v9
371 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v8, v8
372 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x5f7ffffc, v8
373 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v8
374 ; GISEL-NEXT: v_trunc_f32_e32 v9, v9
375 ; GISEL-NEXT: v_mac_f32_e32 v8, 0xcf800000, v9
376 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
377 ; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
378 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v4
379 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v5, vcc
380 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8
381 ; GISEL-NEXT: v_mul_lo_u32 v13, v11, v8
382 ; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9
383 ; GISEL-NEXT: v_mul_hi_u32 v15, v10, v8
384 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
385 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
386 ; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12
387 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v13
388 ; GISEL-NEXT: v_mul_hi_u32 v16, v8, v12
389 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
390 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
391 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
392 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
393 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
394 ; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13
395 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
396 ; GISEL-NEXT: v_mul_hi_u32 v16, v8, v13
397 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
398 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
399 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
400 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
401 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
402 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
403 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
404 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
405 ; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
406 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
407 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
408 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
409 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v8
410 ; GISEL-NEXT: v_mul_lo_u32 v11, v11, v8
411 ; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9
412 ; GISEL-NEXT: v_mul_hi_u32 v10, v10, v8
413 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
414 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
415 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v12
416 ; GISEL-NEXT: v_mul_lo_u32 v13, v8, v10
417 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v12
418 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
419 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
420 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
421 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
422 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
423 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v10
424 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
425 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v10
426 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
427 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
428 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
429 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
430 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
431 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
432 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
433 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
434 ; GISEL-NEXT: v_mul_hi_u32 v10, v9, v10
435 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
436 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
437 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc
438 ; GISEL-NEXT: v_mul_lo_u32 v10, v1, v8
439 ; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9
440 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v8
441 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
442 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
443 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
444 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
445 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
446 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9
447 ; GISEL-NEXT: v_mul_hi_u32 v8, v1, v8
448 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9
449 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
450 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
451 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
452 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
453 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
454 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
455 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
456 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
457 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
458 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
459 ; GISEL-NEXT: v_mul_lo_u32 v10, v4, v8
460 ; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8
461 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v9
462 ; GISEL-NEXT: v_mul_hi_u32 v13, v4, v8
463 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
464 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
465 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
466 ; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
467 ; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11
468 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5
469 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
470 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v4
471 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
472 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v5
473 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5]
474 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v4
475 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v5, vcc
476 ; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
477 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v8
478 ; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc
479 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
480 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
481 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v4
482 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
483 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v5
484 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc
485 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11
486 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v12, vcc
487 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
488 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc
489 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v4, vcc
490 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
491 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v0, vcc
492 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
493 ; GISEL-NEXT: v_cvt_f32_u32_e32 v4, v6
494 ; GISEL-NEXT: v_cvt_f32_u32_e32 v5, v7
495 ; GISEL-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
496 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v4, v4
497 ; GISEL-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
498 ; GISEL-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
499 ; GISEL-NEXT: v_trunc_f32_e32 v5, v5
500 ; GISEL-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
501 ; GISEL-NEXT: v_cvt_u32_f32_e32 v4, v4
502 ; GISEL-NEXT: v_cvt_u32_f32_e32 v5, v5
503 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v6
504 ; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v7, vcc
505 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
506 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v4
507 ; GISEL-NEXT: v_mul_lo_u32 v12, v8, v5
508 ; GISEL-NEXT: v_mul_hi_u32 v13, v8, v4
509 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
510 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
511 ; GISEL-NEXT: v_mul_lo_u32 v12, v5, v10
512 ; GISEL-NEXT: v_mul_lo_u32 v13, v4, v11
513 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v10
514 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
515 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
516 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
517 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
518 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
519 ; GISEL-NEXT: v_mul_lo_u32 v13, v5, v11
520 ; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10
521 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v11
522 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
523 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
524 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
525 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
526 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
527 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
528 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
529 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
530 ; GISEL-NEXT: v_mul_hi_u32 v11, v5, v11
531 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
532 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
533 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v11, vcc
534 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
535 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v4
536 ; GISEL-NEXT: v_mul_lo_u32 v11, v8, v5
537 ; GISEL-NEXT: v_mul_hi_u32 v8, v8, v4
538 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
539 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
540 ; GISEL-NEXT: v_mul_lo_u32 v9, v5, v10
541 ; GISEL-NEXT: v_mul_lo_u32 v11, v4, v8
542 ; GISEL-NEXT: v_mul_hi_u32 v12, v4, v10
543 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
544 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
545 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
546 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
547 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
548 ; GISEL-NEXT: v_mul_lo_u32 v11, v5, v8
549 ; GISEL-NEXT: v_mul_hi_u32 v10, v5, v10
550 ; GISEL-NEXT: v_mul_hi_u32 v12, v4, v8
551 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
552 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
553 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
554 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
555 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
556 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
557 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
558 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
559 ; GISEL-NEXT: v_mul_hi_u32 v8, v5, v8
560 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
561 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v9
562 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v5, v8, vcc
563 ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v4
564 ; GISEL-NEXT: v_mul_lo_u32 v9, v2, v5
565 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v4
566 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
567 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
568 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
569 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
570 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
571 ; GISEL-NEXT: v_mul_lo_u32 v9, v3, v5
572 ; GISEL-NEXT: v_mul_hi_u32 v4, v3, v4
573 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v5
574 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v9, v4
575 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
576 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
577 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
578 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
579 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
580 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
581 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
582 ; GISEL-NEXT: v_mul_hi_u32 v5, v3, v5
583 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v8
584 ; GISEL-NEXT: v_mul_lo_u32 v8, v6, v4
585 ; GISEL-NEXT: v_mul_lo_u32 v9, v7, v4
586 ; GISEL-NEXT: v_mul_lo_u32 v10, v6, v5
587 ; GISEL-NEXT: v_mul_hi_u32 v11, v6, v4
588 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
589 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
590 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
591 ; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
592 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9
593 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7
594 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
595 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v6
596 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
597 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v7
598 ; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5]
599 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v6
600 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v7, vcc
601 ; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
602 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4
603 ; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v5, vcc
604 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v7
605 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
606 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
607 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
608 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v7
609 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc
610 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9
611 ; GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v10, vcc
612 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
613 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc
614 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v6, vcc
615 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
616 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
617 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc
618 ; GISEL-NEXT: s_setpc_b64 s[30:31]
620 ; CGP-LABEL: v_udiv_v2i64:
622 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623 ; CGP-NEXT: v_mov_b32_e32 v10, v0
624 ; CGP-NEXT: v_mov_b32_e32 v11, v1
625 ; CGP-NEXT: v_mov_b32_e32 v8, v2
626 ; CGP-NEXT: v_mov_b32_e32 v9, v3
627 ; CGP-NEXT: v_or_b32_e32 v1, v11, v5
628 ; CGP-NEXT: v_mov_b32_e32 v0, 0
629 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
630 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
631 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
632 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
633 ; CGP-NEXT: s_cbranch_execz .LBB2_2
635 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
636 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v5
637 ; CGP-NEXT: v_sub_i32_e32 v2, vcc, 0, v4
638 ; CGP-NEXT: v_subb_u32_e32 v3, vcc, 0, v5, vcc
639 ; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
640 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
641 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
642 ; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
643 ; CGP-NEXT: v_trunc_f32_e32 v1, v1
644 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
645 ; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1
646 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
647 ; CGP-NEXT: v_mul_lo_u32 v12, v2, v1
648 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v0
649 ; CGP-NEXT: v_mul_lo_u32 v14, v3, v0
650 ; CGP-NEXT: v_mul_hi_u32 v15, v2, v0
651 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
652 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v13
653 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v13
654 ; CGP-NEXT: v_mul_hi_u32 v13, v1, v13
655 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
656 ; CGP-NEXT: v_mul_lo_u32 v15, v0, v12
657 ; CGP-NEXT: v_mul_lo_u32 v17, v1, v12
658 ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12
659 ; CGP-NEXT: v_mul_hi_u32 v12, v1, v12
660 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
661 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
662 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13
663 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
664 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
665 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
666 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18
667 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
668 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
669 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16
670 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
671 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
672 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
673 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
674 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13
675 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v12, vcc
676 ; CGP-NEXT: v_mul_lo_u32 v12, v2, v0
677 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v0
678 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v0
679 ; CGP-NEXT: v_mul_lo_u32 v2, v2, v1
680 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v12
681 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v12
682 ; CGP-NEXT: v_mul_hi_u32 v12, v1, v12
683 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
684 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13
685 ; CGP-NEXT: v_mul_lo_u32 v3, v0, v2
686 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v2
687 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v2
688 ; CGP-NEXT: v_mul_hi_u32 v2, v1, v2
689 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
690 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
691 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
692 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
693 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15
694 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
695 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
696 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
697 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
698 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
699 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3
700 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
701 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
702 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
703 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3
704 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
705 ; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
706 ; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
707 ; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
708 ; CGP-NEXT: v_mul_lo_u32 v12, v10, v1
709 ; CGP-NEXT: v_mul_lo_u32 v13, v11, v1
710 ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1
711 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1
712 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
713 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
714 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0
715 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
716 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
717 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
718 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
719 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
720 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
721 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3
722 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
723 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
724 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
725 ; CGP-NEXT: v_mul_lo_u32 v3, v4, v0
726 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v0
727 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v0
728 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2
729 ; CGP-NEXT: v_mul_lo_u32 v2, v4, v1
730 ; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0
731 ; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc
732 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
733 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14
734 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc
735 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13
736 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3
737 ; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v2, vcc
738 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2
739 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4
740 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
741 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5
742 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
743 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc
744 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5
745 ; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v11, vcc
746 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
747 ; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
748 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4
749 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
750 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
751 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
752 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5
753 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc
754 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
755 ; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v12, vcc
756 ; CGP-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc
757 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
758 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
759 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
760 ; CGP-NEXT: ; implicit-def: $vgpr4
761 ; CGP-NEXT: ; implicit-def: $vgpr10
762 ; CGP-NEXT: .LBB2_2: ; %Flow2
763 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
764 ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
765 ; CGP-NEXT: s_cbranch_execz .LBB2_4
767 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v4
768 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
769 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
770 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
771 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
772 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
773 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
774 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
775 ; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
776 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
777 ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
778 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
779 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
780 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
781 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4
782 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
783 ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
784 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
785 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
786 ; CGP-NEXT: v_mov_b32_e32 v1, 0
788 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
789 ; CGP-NEXT: v_or_b32_e32 v3, v9, v7
790 ; CGP-NEXT: v_mov_b32_e32 v2, 0
791 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
792 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
793 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
794 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
795 ; CGP-NEXT: s_cbranch_execz .LBB2_6
797 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
798 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v7
799 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v6
800 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc
801 ; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
802 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
803 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
804 ; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
805 ; CGP-NEXT: v_trunc_f32_e32 v3, v3
806 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
807 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
808 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
809 ; CGP-NEXT: v_mul_lo_u32 v10, v4, v3
810 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v2
811 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v2
812 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v2
813 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
814 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v11
815 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
816 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v11
817 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
818 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v10
819 ; CGP-NEXT: v_mul_lo_u32 v15, v3, v10
820 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10
821 ; CGP-NEXT: v_mul_hi_u32 v10, v3, v10
822 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
823 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
824 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
825 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
826 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
827 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
828 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
829 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
830 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
831 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
832 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
833 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
834 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
835 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
836 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
837 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v10, vcc
838 ; CGP-NEXT: v_mul_lo_u32 v10, v4, v2
839 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
840 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v2
841 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
842 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v10
843 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v10
844 ; CGP-NEXT: v_mul_hi_u32 v10, v3, v10
845 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
846 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
847 ; CGP-NEXT: v_mul_lo_u32 v5, v2, v4
848 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v4
849 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v4
850 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
851 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
852 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
853 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
854 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
855 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
856 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
857 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
858 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
859 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
860 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
861 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
862 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
863 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
864 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
865 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
866 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc
867 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v2
868 ; CGP-NEXT: v_mul_hi_u32 v5, v8, v2
869 ; CGP-NEXT: v_mul_hi_u32 v2, v9, v2
870 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v3
871 ; CGP-NEXT: v_mul_lo_u32 v11, v9, v3
872 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3
873 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3
874 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
875 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
876 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
877 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
878 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
879 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
880 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
881 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
882 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
883 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
884 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
885 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
886 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
887 ; CGP-NEXT: v_mul_lo_u32 v5, v6, v2
888 ; CGP-NEXT: v_mul_lo_u32 v10, v7, v2
889 ; CGP-NEXT: v_mul_hi_u32 v11, v6, v2
890 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
891 ; CGP-NEXT: v_mul_lo_u32 v4, v6, v3
892 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2
893 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
894 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
895 ; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12
896 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc
897 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
898 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5
899 ; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v4, vcc
900 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4
901 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6
902 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
903 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7
904 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
905 ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc
906 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7
907 ; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v9, vcc
908 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6
909 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
910 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6
911 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
912 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7
913 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
914 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7
915 ; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc
916 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
917 ; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v10, vcc
918 ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
919 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
920 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
921 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
922 ; CGP-NEXT: ; implicit-def: $vgpr6
923 ; CGP-NEXT: ; implicit-def: $vgpr8
924 ; CGP-NEXT: .LBB2_6: ; %Flow
925 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
926 ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
927 ; CGP-NEXT: s_cbranch_execz .LBB2_8
929 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v6
930 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
931 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
932 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
933 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
934 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
935 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
936 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
937 ; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
938 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v6
939 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
940 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3
941 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
942 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
943 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6
944 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
945 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
946 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
947 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
948 ; CGP-NEXT: v_mov_b32_e32 v3, 0
950 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
951 ; CGP-NEXT: s_setpc_b64 s[30:31]
952 %result = udiv <2 x i64> %num, %den
953 ret <2 x i64> %result
956 define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
957 ; CHECK-LABEL: v_udiv_i64_pow2k_denom:
959 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
960 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
961 ; CHECK-NEXT: s_setpc_b64 s[30:31]
962 %result = udiv i64 %num, 4096
966 define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
967 ; CHECK-LABEL: v_udiv_v2i64_pow2k_denom:
969 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
970 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
971 ; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 12
972 ; CHECK-NEXT: s_setpc_b64 s[30:31]
973 %result = udiv <2 x i64> %num, <i64 4096, i64 4096>
974 ret <2 x i64> %result
977 define i64 @v_udiv_i64_oddk_denom(i64 %num) {
978 ; CHECK-LABEL: v_udiv_i64_oddk_denom:
980 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
981 ; CHECK-NEXT: s_mov_b32 s4, 0x1fb03c31
982 ; CHECK-NEXT: s_mov_b32 s5, 0xd9528440
983 ; CHECK-NEXT: v_mul_lo_u32 v2, v1, s4
984 ; CHECK-NEXT: v_mul_lo_u32 v3, v0, s5
985 ; CHECK-NEXT: v_mul_hi_u32 v4, v0, s4
986 ; CHECK-NEXT: v_mul_lo_u32 v5, v1, s5
987 ; CHECK-NEXT: v_mul_hi_u32 v6, v1, s4
988 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, s5
989 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v3
990 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
991 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
992 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
993 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
994 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
995 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v5, v0
996 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
997 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
998 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v6, v4
999 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1000 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1001 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v3, v2
1002 ; CHECK-NEXT: v_mul_hi_u32 v1, v1, s5
1003 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1004 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20
1005 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1006 %result = udiv i64 %num, 1235195
1010 define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
1011 ; CHECK-LABEL: v_udiv_v2i64_oddk_denom:
1013 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014 ; CHECK-NEXT: s_mov_b32 s4, 0x1fb03c31
1015 ; CHECK-NEXT: s_mov_b32 s5, 0xd9528440
1016 ; CHECK-NEXT: v_mul_lo_u32 v4, v1, s4
1017 ; CHECK-NEXT: v_mul_lo_u32 v5, v0, s5
1018 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, s4
1019 ; CHECK-NEXT: v_mul_lo_u32 v7, v1, s5
1020 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, s4
1021 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, s5
1022 ; CHECK-NEXT: v_mul_hi_u32 v1, v1, s5
1023 ; CHECK-NEXT: v_mul_lo_u32 v9, v3, s4
1024 ; CHECK-NEXT: v_mul_lo_u32 v10, v2, s5
1025 ; CHECK-NEXT: v_mul_hi_u32 v11, v2, s4
1026 ; CHECK-NEXT: v_mul_lo_u32 v12, v3, s5
1027 ; CHECK-NEXT: v_mul_hi_u32 v13, v3, s4
1028 ; CHECK-NEXT: v_mul_hi_u32 v2, v2, s5
1029 ; CHECK-NEXT: v_mul_hi_u32 v3, v3, s5
1030 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
1031 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1032 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
1033 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1034 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1035 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1036 ; CHECK-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1037 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1038 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
1039 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1040 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
1041 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1042 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v11
1043 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1044 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v12, v2
1045 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1046 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
1047 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v6
1048 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v7
1049 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v13, v9
1050 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1051 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1052 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v6
1053 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1054 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
1055 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v6
1056 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v4
1057 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v5
1058 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20
1059 ; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 20
1060 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1061 %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
1062 ret <2 x i64> %result
1065 define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
1066 ; CHECK-LABEL: v_udiv_i64_pow2_shl_denom:
1068 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
1070 ; CHECK-NEXT: v_mov_b32_e32 v4, v1
1071 ; CHECK-NEXT: s_mov_b64 s[4:5], 0x1000
1072 ; CHECK-NEXT: v_lshl_b64 v[5:6], s[4:5], v2
1073 ; CHECK-NEXT: v_or_b32_e32 v1, v4, v6
1074 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
1075 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1076 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
1077 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
1078 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1079 ; CHECK-NEXT: s_cbranch_execz .LBB7_2
1080 ; CHECK-NEXT: ; %bb.1:
1081 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
1082 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, v6
1083 ; CHECK-NEXT: v_sub_i32_e32 v2, vcc, 0, v5
1084 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v6, vcc
1085 ; CHECK-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
1086 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
1087 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
1088 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
1089 ; CHECK-NEXT: v_trunc_f32_e32 v1, v1
1090 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
1091 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
1092 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1093 ; CHECK-NEXT: v_mul_lo_u32 v8, v2, v1
1094 ; CHECK-NEXT: v_mul_lo_u32 v9, v2, v0
1095 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
1096 ; CHECK-NEXT: v_mul_hi_u32 v11, v2, v0
1097 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
1098 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v9
1099 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
1100 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v9
1101 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
1102 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
1103 ; CHECK-NEXT: v_mul_lo_u32 v13, v1, v8
1104 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
1105 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
1106 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1107 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1108 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
1109 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1110 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1111 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1112 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
1113 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1114 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1115 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
1116 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1117 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1118 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1119 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1120 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
1121 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v8, vcc
1122 ; CHECK-NEXT: v_mul_lo_u32 v8, v2, v0
1123 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
1124 ; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0
1125 ; CHECK-NEXT: v_mul_lo_u32 v2, v2, v1
1126 ; CHECK-NEXT: v_mul_lo_u32 v10, v1, v8
1127 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
1128 ; CHECK-NEXT: v_mul_hi_u32 v8, v1, v8
1129 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1130 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9
1131 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v2
1132 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v2
1133 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v2
1134 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
1135 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1136 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1137 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1138 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1139 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
1140 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1141 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1142 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1143 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1144 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1145 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1146 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1147 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1148 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
1149 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
1150 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
1151 ; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
1152 ; CHECK-NEXT: v_mul_hi_u32 v7, v3, v0
1153 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
1154 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v1
1155 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1
1156 ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1
1157 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
1158 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
1159 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1160 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
1161 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1162 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1163 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1164 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
1165 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1166 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1167 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1168 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1169 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1170 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1171 ; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0
1172 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0
1173 ; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0
1174 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1175 ; CHECK-NEXT: v_mul_lo_u32 v2, v5, v1
1176 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0
1177 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc
1178 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1179 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10
1180 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
1181 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9
1182 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v7
1183 ; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v4, v2, vcc
1184 ; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2
1185 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5
1186 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1187 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6
1188 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1189 ; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc
1190 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6
1191 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
1192 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
1193 ; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
1194 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
1195 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
1196 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
1197 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1198 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6
1199 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc
1200 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1201 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc
1202 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc
1203 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1204 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1205 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1206 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
1207 ; CHECK-NEXT: ; implicit-def: $vgpr3
1208 ; CHECK-NEXT: .LBB7_2: ; %Flow
1209 ; CHECK-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
1210 ; CHECK-NEXT: s_xor_b64 exec, exec, s[6:7]
1211 ; CHECK-NEXT: s_cbranch_execz .LBB7_4
1212 ; CHECK-NEXT: ; %bb.3:
1213 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v5
1214 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
1215 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v0
1216 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1217 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1218 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
1219 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
1220 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1221 ; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
1222 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5
1223 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1224 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1225 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1226 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1227 ; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5
1228 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1229 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1230 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1231 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1232 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
1233 ; CHECK-NEXT: .LBB7_4:
1234 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
1235 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1236 %shl.y = shl i64 4096, %y
1237 %r = udiv i64 %x, %shl.y
1241 define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
1242 ; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom:
1244 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1245 ; GISEL-NEXT: s_mov_b64 s[4:5], 0x1000
1246 ; GISEL-NEXT: v_lshl_b64 v[7:8], s[4:5], v4
1247 ; GISEL-NEXT: v_lshl_b64 v[4:5], s[4:5], v6
1248 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v7
1249 ; GISEL-NEXT: v_cvt_f32_u32_e32 v9, v8
1250 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v9
1251 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
1252 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1253 ; GISEL-NEXT: v_mul_f32_e32 v9, 0x2f800000, v6
1254 ; GISEL-NEXT: v_trunc_f32_e32 v9, v9
1255 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v9
1256 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1257 ; GISEL-NEXT: v_cvt_u32_f32_e32 v9, v9
1258 ; GISEL-NEXT: v_sub_i32_e32 v10, vcc, 0, v7
1259 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, 0, v8, vcc
1260 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6
1261 ; GISEL-NEXT: v_mul_lo_u32 v13, v11, v6
1262 ; GISEL-NEXT: v_mul_lo_u32 v14, v10, v9
1263 ; GISEL-NEXT: v_mul_hi_u32 v15, v10, v6
1264 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1265 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v15
1266 ; GISEL-NEXT: v_mul_lo_u32 v14, v9, v12
1267 ; GISEL-NEXT: v_mul_lo_u32 v15, v6, v13
1268 ; GISEL-NEXT: v_mul_hi_u32 v16, v6, v12
1269 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1270 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1271 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
1272 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1273 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
1274 ; GISEL-NEXT: v_mul_lo_u32 v15, v9, v13
1275 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
1276 ; GISEL-NEXT: v_mul_hi_u32 v16, v6, v13
1277 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1278 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1279 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1280 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1281 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1282 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1283 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1284 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v15, v14
1285 ; GISEL-NEXT: v_mul_hi_u32 v13, v9, v13
1286 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1287 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1288 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v13, vcc
1289 ; GISEL-NEXT: v_mul_lo_u32 v12, v10, v6
1290 ; GISEL-NEXT: v_mul_lo_u32 v11, v11, v6
1291 ; GISEL-NEXT: v_mul_lo_u32 v13, v10, v9
1292 ; GISEL-NEXT: v_mul_hi_u32 v10, v10, v6
1293 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1294 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1295 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v12
1296 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v10
1297 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v12
1298 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1299 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1300 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v14
1301 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1302 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v11
1303 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v10
1304 ; GISEL-NEXT: v_mul_hi_u32 v12, v9, v12
1305 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10
1306 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1307 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1308 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1309 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1310 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1311 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v12, v11
1312 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1313 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1314 ; GISEL-NEXT: v_mul_hi_u32 v10, v9, v10
1315 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1316 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v11
1317 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v9, v10, vcc
1318 ; GISEL-NEXT: v_mul_lo_u32 v10, v1, v6
1319 ; GISEL-NEXT: v_mul_lo_u32 v11, v0, v9
1320 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v6
1321 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1322 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1323 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1324 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1325 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1326 ; GISEL-NEXT: v_mul_lo_u32 v11, v1, v9
1327 ; GISEL-NEXT: v_mul_hi_u32 v6, v1, v6
1328 ; GISEL-NEXT: v_mul_hi_u32 v12, v0, v9
1329 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6
1330 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1331 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v12
1332 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1333 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1334 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1335 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1336 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1337 ; GISEL-NEXT: v_mul_hi_u32 v9, v1, v9
1338 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1339 ; GISEL-NEXT: v_mul_lo_u32 v10, v7, v6
1340 ; GISEL-NEXT: v_mul_lo_u32 v11, v8, v6
1341 ; GISEL-NEXT: v_mul_lo_u32 v12, v7, v9
1342 ; GISEL-NEXT: v_mul_hi_u32 v13, v7, v6
1343 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1344 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1345 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
1346 ; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], v1, v11, vcc
1347 ; GISEL-NEXT: v_sub_i32_e64 v1, s[4:5], v1, v11
1348 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v8
1349 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
1350 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v7
1351 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
1352 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v10, v8
1353 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v11, v12, s[4:5]
1354 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v7
1355 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
1356 ; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[4:5]
1357 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v6
1358 ; GISEL-NEXT: v_addc_u32_e32 v12, vcc, 0, v9, vcc
1359 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v8
1360 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc
1361 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v7
1362 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
1363 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v1, v8
1364 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v13, v0, vcc
1365 ; GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v11
1366 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v12, vcc
1367 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
1368 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v11, v1, vcc
1369 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v12, v7, vcc
1370 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
1371 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc
1372 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v1, vcc
1373 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v4
1374 ; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v5
1375 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7
1376 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
1377 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1378 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x2f800000, v6
1379 ; GISEL-NEXT: v_trunc_f32_e32 v7, v7
1380 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v7
1381 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1382 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1383 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4
1384 ; GISEL-NEXT: v_subb_u32_e32 v9, vcc, 0, v5, vcc
1385 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6
1386 ; GISEL-NEXT: v_mul_lo_u32 v11, v9, v6
1387 ; GISEL-NEXT: v_mul_lo_u32 v12, v8, v7
1388 ; GISEL-NEXT: v_mul_hi_u32 v13, v8, v6
1389 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1390 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1391 ; GISEL-NEXT: v_mul_lo_u32 v12, v7, v10
1392 ; GISEL-NEXT: v_mul_lo_u32 v13, v6, v11
1393 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v10
1394 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1395 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1396 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1397 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1398 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1399 ; GISEL-NEXT: v_mul_lo_u32 v13, v7, v11
1400 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
1401 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v11
1402 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v13, v10
1403 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1404 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
1405 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1406 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1407 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1408 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1409 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1410 ; GISEL-NEXT: v_mul_hi_u32 v11, v7, v11
1411 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1412 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1413 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v11, vcc
1414 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v6
1415 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v6
1416 ; GISEL-NEXT: v_mul_lo_u32 v11, v8, v7
1417 ; GISEL-NEXT: v_mul_hi_u32 v8, v8, v6
1418 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1419 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1420 ; GISEL-NEXT: v_mul_lo_u32 v9, v7, v10
1421 ; GISEL-NEXT: v_mul_lo_u32 v11, v6, v8
1422 ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v10
1423 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1424 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1425 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
1426 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1427 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v11, v9
1428 ; GISEL-NEXT: v_mul_lo_u32 v11, v7, v8
1429 ; GISEL-NEXT: v_mul_hi_u32 v10, v7, v10
1430 ; GISEL-NEXT: v_mul_hi_u32 v12, v6, v8
1431 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1432 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1433 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1434 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1435 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1436 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
1437 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1438 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1439 ; GISEL-NEXT: v_mul_hi_u32 v8, v7, v8
1440 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1441 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
1442 ; GISEL-NEXT: v_addc_u32_e32 v7, vcc, v7, v8, vcc
1443 ; GISEL-NEXT: v_mul_lo_u32 v8, v3, v6
1444 ; GISEL-NEXT: v_mul_lo_u32 v9, v2, v7
1445 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v6
1446 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
1447 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1448 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1449 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1450 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1451 ; GISEL-NEXT: v_mul_lo_u32 v9, v3, v7
1452 ; GISEL-NEXT: v_mul_hi_u32 v6, v3, v6
1453 ; GISEL-NEXT: v_mul_hi_u32 v10, v2, v7
1454 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
1455 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1456 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v10
1457 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1458 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1459 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
1460 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1461 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1462 ; GISEL-NEXT: v_mul_hi_u32 v7, v3, v7
1463 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v8
1464 ; GISEL-NEXT: v_mul_lo_u32 v8, v4, v6
1465 ; GISEL-NEXT: v_mul_lo_u32 v9, v5, v6
1466 ; GISEL-NEXT: v_mul_lo_u32 v10, v4, v7
1467 ; GISEL-NEXT: v_mul_hi_u32 v11, v4, v6
1468 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1469 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1470 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v8
1471 ; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], v3, v9, vcc
1472 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v3, v9
1473 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v5
1474 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1475 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v4
1476 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5]
1477 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v8, v5
1478 ; GISEL-NEXT: v_cndmask_b32_e64 v8, v9, v10, s[4:5]
1479 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v4
1480 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v5, vcc
1481 ; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[4:5]
1482 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v6
1483 ; GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v7, vcc
1484 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
1485 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
1486 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
1487 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
1488 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, v3, v5
1489 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v11, v2, vcc
1490 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v9
1491 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v10, vcc
1492 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1493 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v9, v3, vcc
1494 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v10, v4, vcc
1495 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
1496 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v6, v2, vcc
1497 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v7, v3, vcc
1498 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1500 ; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
1502 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1503 ; CGP-NEXT: v_mov_b32_e32 v8, v0
1504 ; CGP-NEXT: v_mov_b32_e32 v9, v1
1505 ; CGP-NEXT: v_mov_b32_e32 v5, v2
1506 ; CGP-NEXT: v_mov_b32_e32 v7, v3
1507 ; CGP-NEXT: s_mov_b64 s[6:7], 0x1000
1508 ; CGP-NEXT: v_lshl_b64 v[2:3], s[6:7], v4
1509 ; CGP-NEXT: v_or_b32_e32 v1, v9, v3
1510 ; CGP-NEXT: v_mov_b32_e32 v0, 0
1511 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1512 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
1513 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
1514 ; CGP-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
1515 ; CGP-NEXT: s_cbranch_execz .LBB8_2
1516 ; CGP-NEXT: ; %bb.1:
1517 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
1518 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v3
1519 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v2
1520 ; CGP-NEXT: v_subb_u32_e32 v10, vcc, 0, v3, vcc
1521 ; CGP-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
1522 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
1523 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
1524 ; CGP-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
1525 ; CGP-NEXT: v_trunc_f32_e32 v1, v1
1526 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
1527 ; CGP-NEXT: v_cvt_u32_f32_e32 v1, v1
1528 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
1529 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v1
1530 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v0
1531 ; CGP-NEXT: v_mul_lo_u32 v13, v10, v0
1532 ; CGP-NEXT: v_mul_hi_u32 v14, v4, v0
1533 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v13, v11
1534 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v12
1535 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v12
1536 ; CGP-NEXT: v_mul_hi_u32 v12, v1, v12
1537 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v14
1538 ; CGP-NEXT: v_mul_lo_u32 v14, v0, v11
1539 ; CGP-NEXT: v_mul_lo_u32 v16, v1, v11
1540 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v11
1541 ; CGP-NEXT: v_mul_hi_u32 v11, v1, v11
1542 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
1543 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1544 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v16, v12
1545 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1546 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
1547 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1548 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1549 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1550 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1551 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v16, v15
1552 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1553 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1554 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1555 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1556 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12
1557 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v11, vcc
1558 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v0
1559 ; CGP-NEXT: v_mul_lo_u32 v10, v10, v0
1560 ; CGP-NEXT: v_mul_hi_u32 v12, v4, v0
1561 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v1
1562 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v11
1563 ; CGP-NEXT: v_mul_hi_u32 v14, v0, v11
1564 ; CGP-NEXT: v_mul_hi_u32 v11, v1, v11
1565 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
1566 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12
1567 ; CGP-NEXT: v_mul_lo_u32 v10, v0, v4
1568 ; CGP-NEXT: v_mul_lo_u32 v12, v1, v4
1569 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v4
1570 ; CGP-NEXT: v_mul_hi_u32 v4, v1, v4
1571 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10
1572 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1573 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
1574 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1575 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
1576 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1577 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v15
1578 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1579 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v13, v10
1580 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1581 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1582 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1583 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v12, v11
1584 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
1585 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v10
1586 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v1, v4, vcc
1587 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v0
1588 ; CGP-NEXT: v_mul_hi_u32 v10, v8, v0
1589 ; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
1590 ; CGP-NEXT: v_mul_lo_u32 v11, v8, v1
1591 ; CGP-NEXT: v_mul_lo_u32 v12, v9, v1
1592 ; CGP-NEXT: v_mul_hi_u32 v13, v8, v1
1593 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1
1594 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
1595 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1596 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v12, v0
1597 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1598 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
1599 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1600 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13
1601 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1602 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4
1603 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
1604 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1605 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1606 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
1607 ; CGP-NEXT: v_mul_lo_u32 v10, v2, v0
1608 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v0
1609 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v0
1610 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
1611 ; CGP-NEXT: v_mul_lo_u32 v4, v2, v1
1612 ; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v0
1613 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v1, vcc
1614 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v11, v4
1615 ; CGP-NEXT: v_add_i32_e32 v11, vcc, 1, v13
1616 ; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v14, vcc
1617 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12
1618 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v10
1619 ; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v9, v4, vcc
1620 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4
1621 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2
1622 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1623 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v3
1624 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[4:5]
1625 ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v3, vcc
1626 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v3
1627 ; CGP-NEXT: v_cndmask_b32_e32 v9, v12, v9, vcc
1628 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v2
1629 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
1630 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2
1631 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
1632 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3
1633 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
1634 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
1635 ; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
1636 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1637 ; CGP-NEXT: v_cndmask_b32_e32 v2, v13, v11, vcc
1638 ; CGP-NEXT: v_cndmask_b32_e32 v3, v14, v15, vcc
1639 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
1640 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1641 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1642 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
1643 ; CGP-NEXT: ; implicit-def: $vgpr8
1644 ; CGP-NEXT: .LBB8_2: ; %Flow2
1645 ; CGP-NEXT: s_or_saveexec_b64 s[8:9], s[8:9]
1646 ; CGP-NEXT: v_lshl_b64 v[9:10], s[6:7], v6
1647 ; CGP-NEXT: s_xor_b64 exec, exec, s[8:9]
1648 ; CGP-NEXT: s_cbranch_execz .LBB8_4
1649 ; CGP-NEXT: ; %bb.3:
1650 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v2
1651 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
1652 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v0
1653 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1654 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
1655 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
1656 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
1657 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1658 ; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
1659 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v2
1660 ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1661 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
1662 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
1663 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1664 ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
1665 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1666 ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1667 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
1668 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1669 ; CGP-NEXT: v_mov_b32_e32 v1, 0
1670 ; CGP-NEXT: .LBB8_4:
1671 ; CGP-NEXT: s_or_b64 exec, exec, s[8:9]
1672 ; CGP-NEXT: v_or_b32_e32 v3, v7, v10
1673 ; CGP-NEXT: v_mov_b32_e32 v2, 0
1674 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
1675 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
1676 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
1677 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1678 ; CGP-NEXT: s_cbranch_execz .LBB8_6
1679 ; CGP-NEXT: ; %bb.5:
1680 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v9
1681 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v10
1682 ; CGP-NEXT: v_sub_i32_e32 v4, vcc, 0, v9
1683 ; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v10, vcc
1684 ; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
1685 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
1686 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
1687 ; CGP-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
1688 ; CGP-NEXT: v_trunc_f32_e32 v3, v3
1689 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
1690 ; CGP-NEXT: v_cvt_u32_f32_e32 v3, v3
1691 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
1692 ; CGP-NEXT: v_mul_lo_u32 v8, v4, v3
1693 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v2
1694 ; CGP-NEXT: v_mul_lo_u32 v12, v6, v2
1695 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v2
1696 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8
1697 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v11
1698 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
1699 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v11
1700 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13
1701 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v8
1702 ; CGP-NEXT: v_mul_lo_u32 v15, v3, v8
1703 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8
1704 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
1705 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1706 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1707 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
1708 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1709 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1710 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1711 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
1712 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1713 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1714 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
1715 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1716 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1717 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1718 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1719 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
1720 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v8, vcc
1721 ; CGP-NEXT: v_mul_lo_u32 v8, v4, v2
1722 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
1723 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v2
1724 ; CGP-NEXT: v_mul_lo_u32 v4, v4, v3
1725 ; CGP-NEXT: v_mul_lo_u32 v12, v3, v8
1726 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v8
1727 ; CGP-NEXT: v_mul_hi_u32 v8, v3, v8
1728 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
1729 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
1730 ; CGP-NEXT: v_mul_lo_u32 v6, v2, v4
1731 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v4
1732 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v4
1733 ; CGP-NEXT: v_mul_hi_u32 v4, v3, v4
1734 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
1735 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1736 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
1737 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1738 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
1739 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1740 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14
1741 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1742 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
1743 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1744 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
1745 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1746 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
1747 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
1748 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
1749 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc
1750 ; CGP-NEXT: v_mul_lo_u32 v4, v7, v2
1751 ; CGP-NEXT: v_mul_hi_u32 v6, v5, v2
1752 ; CGP-NEXT: v_mul_hi_u32 v2, v7, v2
1753 ; CGP-NEXT: v_mul_lo_u32 v8, v5, v3
1754 ; CGP-NEXT: v_mul_lo_u32 v11, v7, v3
1755 ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3
1756 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3
1757 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
1758 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1759 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
1760 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1761 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
1762 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1763 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
1764 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1765 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
1766 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6
1767 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
1768 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1769 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
1770 ; CGP-NEXT: v_mul_lo_u32 v6, v9, v2
1771 ; CGP-NEXT: v_mul_lo_u32 v8, v10, v2
1772 ; CGP-NEXT: v_mul_hi_u32 v11, v9, v2
1773 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1774 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v3
1775 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2
1776 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
1777 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
1778 ; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12
1779 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc
1780 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
1781 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6
1782 ; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v7, v4, vcc
1783 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4
1784 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v9
1785 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
1786 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10
1787 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
1788 ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc
1789 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v10
1790 ; CGP-NEXT: v_cndmask_b32_e32 v6, v11, v7, vcc
1791 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
1792 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
1793 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v9
1794 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1795 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10
1796 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
1797 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10
1798 ; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v5, vcc
1799 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1800 ; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v8, vcc
1801 ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
1802 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1803 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1804 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1805 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
1806 ; CGP-NEXT: ; implicit-def: $vgpr5
1807 ; CGP-NEXT: .LBB8_6: ; %Flow
1808 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
1809 ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
1810 ; CGP-NEXT: s_cbranch_execz .LBB8_8
1811 ; CGP-NEXT: ; %bb.7:
1812 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v9
1813 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
1814 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v2
1815 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1816 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
1817 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
1818 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
1819 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1820 ; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
1821 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v9
1822 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
1823 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
1824 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
1825 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1826 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9
1827 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
1828 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
1829 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
1830 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1831 ; CGP-NEXT: v_mov_b32_e32 v3, 0
1832 ; CGP-NEXT: .LBB8_8:
1833 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
1834 ; CGP-NEXT: s_setpc_b64 s[30:31]
1835 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
1836 %r = udiv <2 x i64> %x, %shl.y
1840 define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
1841 ; GISEL-LABEL: v_udiv_i64_24bit:
1843 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1844 ; GISEL-NEXT: s_mov_b32 s4, 0xffffff
1845 ; GISEL-NEXT: v_and_b32_e32 v0, s4, v0
1846 ; GISEL-NEXT: v_and_b32_e32 v1, s4, v2
1847 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
1848 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
1849 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
1850 ; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1851 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
1852 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
1853 ; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
1854 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1855 ; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
1856 ; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
1857 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2
1858 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
1859 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
1860 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1861 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
1862 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1863 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2
1864 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
1865 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
1866 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
1867 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1869 ; CGP-LABEL: v_udiv_i64_24bit:
1871 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1872 ; CGP-NEXT: s_mov_b32 s4, 0xffffff
1873 ; CGP-NEXT: v_and_b32_e32 v0, s4, v0
1874 ; CGP-NEXT: v_and_b32_e32 v1, s4, v2
1875 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0
1876 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1
1877 ; CGP-NEXT: v_rcp_f32_e32 v2, v1
1878 ; CGP-NEXT: v_mul_f32_e32 v2, v0, v2
1879 ; CGP-NEXT: v_trunc_f32_e32 v2, v2
1880 ; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0
1881 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
1882 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1
1883 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1884 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0
1885 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1886 ; CGP-NEXT: v_mov_b32_e32 v1, 0
1887 ; CGP-NEXT: s_setpc_b64 s[30:31]
1888 %num.mask = and i64 %num, 16777215
1889 %den.mask = and i64 %den, 16777215
1890 %result = udiv i64 %num.mask, %den.mask
1894 define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
1895 ; GISEL-LABEL: v_udiv_v2i64_24bit:
1897 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1898 ; GISEL-NEXT: s_mov_b32 s6, 0xffffff
1899 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v7, 0
1900 ; GISEL-NEXT: v_and_b32_e32 v1, s6, v4
1901 ; GISEL-NEXT: v_and_b32_e32 v3, s6, v6
1902 ; GISEL-NEXT: v_cvt_f32_u32_e32 v6, v1
1903 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
1904 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
1905 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v3
1906 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v3
1907 ; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
1908 ; GISEL-NEXT: v_mac_f32_e32 v6, 0x4f800000, v7
1909 ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v7
1910 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v6
1911 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8
1912 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1913 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
1914 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
1915 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
1916 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8
1917 ; GISEL-NEXT: v_trunc_f32_e32 v11, v11
1918 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
1919 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
1920 ; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11
1921 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
1922 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1923 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
1924 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1925 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
1926 ; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6
1927 ; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6
1928 ; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6
1929 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7
1930 ; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7
1931 ; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7
1932 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1933 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
1934 ; GISEL-NEXT: v_mul_lo_u32 v15, v11, v17
1935 ; GISEL-NEXT: v_mul_hi_u32 v18, v7, v17
1936 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19
1937 ; GISEL-NEXT: v_mul_lo_u32 v19, v7, v13
1938 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
1939 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1940 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v18
1941 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
1942 ; GISEL-NEXT: v_mul_hi_u32 v18, v6, v14
1943 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
1944 ; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
1945 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v16
1946 ; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12
1947 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v16
1948 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[4:5]
1949 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18
1950 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v12
1951 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1952 ; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v16, v18
1953 ; GISEL-NEXT: v_mul_hi_u32 v18, v6, v12
1954 ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v15, v14
1955 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[4:5]
1956 ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v14, v18
1957 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
1958 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v18
1959 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1960 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
1961 ; GISEL-NEXT: v_mul_lo_u32 v19, v11, v13
1962 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
1963 ; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
1964 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1965 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19
1966 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1967 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v20, v19
1968 ; GISEL-NEXT: s_bfe_i32 s4, -1, 0x10000
1969 ; GISEL-NEXT: s_bfe_i32 s5, -1, 0x10000
1970 ; GISEL-NEXT: s_bfe_i32 s7, -1, 0x10000
1971 ; GISEL-NEXT: s_bfe_i32 s8, -1, 0x10000
1972 ; GISEL-NEXT: v_and_b32_e32 v0, s6, v0
1973 ; GISEL-NEXT: v_and_b32_e32 v2, s6, v2
1974 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v16
1975 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1976 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1977 ; GISEL-NEXT: v_mov_b32_e32 v16, s4
1978 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
1979 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1980 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v19, v18
1981 ; GISEL-NEXT: v_mov_b32_e32 v19, s5
1982 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
1983 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
1984 ; GISEL-NEXT: v_mov_b32_e32 v15, s7
1985 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
1986 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v18
1987 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
1988 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
1989 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6
1990 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6
1991 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6
1992 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
1993 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc
1994 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7
1995 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7
1996 ; GISEL-NEXT: v_mul_hi_u32 v17, v9, v7
1997 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8
1998 ; GISEL-NEXT: v_mul_lo_u32 v18, v8, v12
1999 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
2000 ; GISEL-NEXT: v_mul_hi_u32 v5, v6, v12
2001 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
2002 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11
2003 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
2004 ; GISEL-NEXT: v_mul_lo_u32 v10, v11, v13
2005 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
2006 ; GISEL-NEXT: v_mul_hi_u32 v14, v7, v13
2007 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
2008 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
2009 ; GISEL-NEXT: v_mul_lo_u32 v17, v7, v9
2010 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
2011 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2012 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
2013 ; GISEL-NEXT: v_mul_lo_u32 v10, v6, v4
2014 ; GISEL-NEXT: v_mul_lo_u32 v14, v8, v4
2015 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v18, v10
2016 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
2017 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v10, v5
2018 ; GISEL-NEXT: v_mul_hi_u32 v5, v6, v4
2019 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2020 ; GISEL-NEXT: v_add_i32_e64 v10, s[4:5], v18, v10
2021 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v9
2022 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12
2023 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
2024 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v12, v5
2025 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2026 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v14, v12
2027 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2028 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v17, v14
2029 ; GISEL-NEXT: v_mul_hi_u32 v17, v7, v9
2030 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
2031 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2032 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v17
2033 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2034 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v18, v17
2035 ; GISEL-NEXT: v_mov_b32_e32 v18, s8
2036 ; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4
2037 ; GISEL-NEXT: v_mul_hi_u32 v9, v11, v9
2038 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
2039 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2040 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
2041 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2042 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
2043 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v14
2044 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
2045 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v12
2046 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v6, v5
2047 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc
2048 ; GISEL-NEXT: v_mul_lo_u32 v6, 0, v5
2049 ; GISEL-NEXT: v_mul_hi_u32 v8, v0, v5
2050 ; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
2051 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
2052 ; GISEL-NEXT: v_addc_u32_e32 v9, vcc, v11, v9, vcc
2053 ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7
2054 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7
2055 ; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
2056 ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v4
2057 ; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
2058 ; GISEL-NEXT: v_mul_hi_u32 v14, v0, v4
2059 ; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
2060 ; GISEL-NEXT: v_mul_lo_u32 v17, v2, v9
2061 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
2062 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2063 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2064 ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v9
2065 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v9
2066 ; GISEL-NEXT: v_mul_hi_u32 v9, 0, v9
2067 ; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v12
2068 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
2069 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v13, v5
2070 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
2071 ; GISEL-NEXT: v_add_i32_e64 v7, s[4:5], v10, v7
2072 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, s[4:5]
2073 ; GISEL-NEXT: v_add_i32_e64 v6, s[4:5], v6, v8
2074 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[4:5]
2075 ; GISEL-NEXT: v_add_i32_e64 v5, s[4:5], v5, v14
2076 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
2077 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2078 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v11
2079 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2080 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v12, v6
2081 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v13, v8
2082 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v17, v14
2083 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2084 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v6
2085 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
2086 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v12
2087 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2088 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v8, v6
2089 ; GISEL-NEXT: v_mul_lo_u32 v8, v1, v5
2090 ; GISEL-NEXT: v_mul_lo_u32 v12, 0, v5
2091 ; GISEL-NEXT: v_mul_hi_u32 v13, v1, v5
2092 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2093 ; GISEL-NEXT: v_mul_lo_u32 v11, v3, v7
2094 ; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7
2095 ; GISEL-NEXT: v_mul_hi_u32 v17, v3, v7
2096 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v6
2097 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v10
2098 ; GISEL-NEXT: v_mul_lo_u32 v9, v1, v4
2099 ; GISEL-NEXT: v_mul_lo_u32 v10, v3, v6
2100 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
2101 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v14, v10
2102 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v5
2103 ; GISEL-NEXT: v_addc_u32_e32 v14, vcc, 0, v4, vcc
2104 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
2105 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v17
2106 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v8
2107 ; GISEL-NEXT: v_subb_u32_e64 v8, s[4:5], 0, v9, vcc
2108 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v0, v1
2109 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
2110 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v8
2111 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], 1, v7
2112 ; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, v6, s[6:7]
2113 ; GISEL-NEXT: v_sub_i32_e64 v2, s[6:7], v2, v11
2114 ; GISEL-NEXT: v_subb_u32_e64 v11, s[8:9], 0, v10, s[6:7]
2115 ; GISEL-NEXT: v_cndmask_b32_e64 v13, v16, v13, s[4:5]
2116 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3
2117 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[4:5]
2118 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v11
2119 ; GISEL-NEXT: v_cndmask_b32_e64 v11, v15, v16, s[4:5]
2120 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], 1, v12
2121 ; GISEL-NEXT: v_addc_u32_e64 v16, s[4:5], 0, v14, s[4:5]
2122 ; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 0, v10
2123 ; GISEL-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v10, s[6:7]
2124 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v3
2125 ; GISEL-NEXT: v_subbrev_u32_e64 v10, s[4:5], 0, v10, s[4:5]
2126 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v2, v3
2127 ; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 1, v8
2128 ; GISEL-NEXT: v_addc_u32_e64 v3, s[6:7], 0, v17, s[6:7]
2129 ; GISEL-NEXT: v_sub_i32_e64 v9, s[6:7], 0, v9
2130 ; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
2131 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
2132 ; GISEL-NEXT: v_subbrev_u32_e32 v9, vcc, 0, v9, vcc
2133 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
2134 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
2135 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[4:5]
2136 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v9
2137 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v19, v0, vcc
2138 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v10
2139 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v18, v1, vcc
2140 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
2141 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v12, v15, vcc
2142 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v1
2143 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v8, v2, s[4:5]
2144 ; GISEL-NEXT: v_cndmask_b32_e32 v8, v14, v16, vcc
2145 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v13
2146 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc
2147 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v17, v3, s[4:5]
2148 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
2149 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v1, s[4:5]
2150 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v8, vcc
2151 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v6, v3, s[4:5]
2152 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2154 ; CGP-LABEL: v_udiv_v2i64_24bit:
2156 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2157 ; CGP-NEXT: s_mov_b32 s6, 0xffffff
2158 ; CGP-NEXT: v_and_b32_e32 v0, s6, v0
2159 ; CGP-NEXT: v_and_b32_e32 v1, s6, v2
2160 ; CGP-NEXT: v_and_b32_e32 v2, s6, v4
2161 ; CGP-NEXT: v_and_b32_e32 v3, s6, v6
2162 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0
2163 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v2
2164 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1
2165 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3
2166 ; CGP-NEXT: v_rcp_f32_e32 v4, v2
2167 ; CGP-NEXT: v_rcp_f32_e32 v5, v3
2168 ; CGP-NEXT: v_mul_f32_e32 v4, v0, v4
2169 ; CGP-NEXT: v_mul_f32_e32 v5, v1, v5
2170 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
2171 ; CGP-NEXT: v_trunc_f32_e32 v5, v5
2172 ; CGP-NEXT: v_mad_f32 v0, -v4, v2, v0
2173 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
2174 ; CGP-NEXT: v_mad_f32 v1, -v5, v3, v1
2175 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
2176 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2
2177 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2178 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, v3
2179 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
2180 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
2181 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1
2182 ; CGP-NEXT: v_and_b32_e32 v0, s6, v0
2183 ; CGP-NEXT: v_and_b32_e32 v2, s6, v1
2184 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2185 ; CGP-NEXT: v_mov_b32_e32 v3, 0
2186 ; CGP-NEXT: s_setpc_b64 s[30:31]
2187 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
2188 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
2189 %result = udiv <2 x i64> %num.mask, %den.mask
2190 ret <2 x i64> %result