1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=1 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,GISEL %s
3 ; RUN: llc -global-isel -amdgpu-codegenprepare-disable-idiv-expansion=0 -mtriple=amdgcn-amd-amdpal -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts < %s | FileCheck -check-prefixes=CHECK,CGP %s
5 ; The same 32-bit expansion is implemented in the legalizer and in AMDGPUCodeGenPrepare.
7 define i64 @v_udiv_i64(i64 %num, i64 %den) {
8 ; CHECK-LABEL: v_udiv_i64:
10 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11 ; CHECK-NEXT: v_mov_b32_e32 v4, v0
12 ; CHECK-NEXT: v_mov_b32_e32 v5, v1
13 ; CHECK-NEXT: v_or_b32_e32 v1, v5, v3
14 ; CHECK-NEXT: v_mov_b32_e32 v0, 0
15 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
16 ; CHECK-NEXT: v_cvt_f32_u32_e32 v6, v2
17 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
18 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
19 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
20 ; CHECK-NEXT: s_cbranch_execnz .LBB0_3
21 ; CHECK-NEXT: ; %bb.1: ; %Flow
22 ; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
23 ; CHECK-NEXT: s_cbranch_execnz .LBB0_4
24 ; CHECK-NEXT: .LBB0_2:
25 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
26 ; CHECK-NEXT: s_setpc_b64 s[30:31]
27 ; CHECK-NEXT: .LBB0_3:
28 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v3
29 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
30 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
31 ; CHECK-NEXT: v_mac_f32_e32 v6, 0x4f800000, v0
32 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6
33 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
34 ; CHECK-NEXT: v_mul_f32_e32 v6, 0x2f800000, v0
35 ; CHECK-NEXT: v_trunc_f32_e32 v6, v6
36 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v6
37 ; CHECK-NEXT: v_cvt_u32_f32_e32 v6, v6
38 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
39 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v6
40 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v0
41 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
42 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v0
43 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
44 ; CHECK-NEXT: v_mul_lo_u32 v10, v6, v9
45 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
46 ; CHECK-NEXT: v_mul_hi_u32 v9, v6, v9
47 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
48 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
49 ; CHECK-NEXT: v_mul_lo_u32 v13, v6, v8
50 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
51 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8
52 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
53 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
54 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
55 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
56 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
57 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
58 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
59 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
60 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
61 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
62 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
63 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
64 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
65 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
66 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
67 ; CHECK-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc
68 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v0
69 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
70 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v0
71 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v6
72 ; CHECK-NEXT: v_mul_lo_u32 v10, v6, v8
73 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
74 ; CHECK-NEXT: v_mul_hi_u32 v8, v6, v8
75 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1
76 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9
77 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v1
78 ; CHECK-NEXT: v_mul_lo_u32 v9, v6, v1
79 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1
80 ; CHECK-NEXT: v_mul_hi_u32 v1, v6, v1
81 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
82 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
83 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
84 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
85 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
86 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
87 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
88 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
89 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
90 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
91 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
92 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
93 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
94 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
95 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
96 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v6, v1, vcc
97 ; CHECK-NEXT: v_mul_lo_u32 v6, v5, v0
98 ; CHECK-NEXT: v_mul_hi_u32 v7, v4, v0
99 ; CHECK-NEXT: v_mul_hi_u32 v0, v5, v0
100 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v1
101 ; CHECK-NEXT: v_mul_lo_u32 v9, v5, v1
102 ; CHECK-NEXT: v_mul_hi_u32 v10, v4, v1
103 ; CHECK-NEXT: v_mul_hi_u32 v1, v5, v1
104 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
105 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
106 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
107 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
108 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
109 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
110 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
111 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
112 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
113 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
114 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v6
115 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
116 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v7, v6
117 ; CHECK-NEXT: v_mul_lo_u32 v7, v2, v0
118 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v0
119 ; CHECK-NEXT: v_mul_hi_u32 v9, v2, v0
120 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6
121 ; CHECK-NEXT: v_mul_lo_u32 v6, v2, v1
122 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0
123 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc
124 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
125 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10
126 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
127 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v9
128 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v7
129 ; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v5, v6, vcc
130 ; CHECK-NEXT: v_sub_i32_e64 v5, s[4:5], v5, v6
131 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v2
132 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5]
133 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v3
134 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
135 ; CHECK-NEXT: v_subb_u32_e32 v5, vcc, v5, v3, vcc
136 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v3
137 ; CHECK-NEXT: v_cndmask_b32_e32 v6, v9, v6, vcc
138 ; CHECK-NEXT: v_sub_i32_e32 v4, vcc, v4, v2
139 ; CHECK-NEXT: v_subbrev_u32_e32 v5, vcc, 0, v5, vcc
140 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v4, v2
141 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
142 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v5, v3
143 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
144 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v5, v3
145 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc
146 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
147 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc
148 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc
149 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
150 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
151 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
152 ; CHECK-NEXT: ; implicit-def: $vgpr6
153 ; CHECK-NEXT: ; implicit-def: $vgpr2
154 ; CHECK-NEXT: ; implicit-def: $vgpr4
155 ; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
156 ; CHECK-NEXT: s_cbranch_execz .LBB0_2
157 ; CHECK-NEXT: .LBB0_4:
158 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v6
159 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
160 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
161 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
162 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
163 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
164 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
165 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
166 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2
167 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
168 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
169 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
170 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
171 ; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
172 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
173 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
174 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
175 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
176 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
177 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
178 ; CHECK-NEXT: s_setpc_b64 s[30:31]
179 %result = udiv i64 %num, %den
183 ; FIXME: This is a workaround for not handling uniform VGPR case.
184 declare i32 @llvm.amdgcn.readfirstlane(i32)
186 define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
187 ; CHECK-LABEL: s_udiv_i64:
189 ; CHECK-NEXT: s_or_b64 s[4:5], s[0:1], s[2:3]
190 ; CHECK-NEXT: s_mov_b32 s6, 0
191 ; CHECK-NEXT: s_mov_b32 s7, -1
192 ; CHECK-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7]
193 ; CHECK-NEXT: v_cmp_ne_u64_e64 vcc, s[4:5], 0
194 ; CHECK-NEXT: s_mov_b32 s4, 1
195 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, s2
196 ; CHECK-NEXT: s_cbranch_vccz .LBB1_2
197 ; CHECK-NEXT: ; %bb.1:
198 ; CHECK-NEXT: v_mov_b32_e32 v0, s3
199 ; CHECK-NEXT: v_cvt_f32_u32_e32 v1, s3
200 ; CHECK-NEXT: s_sub_u32 s4, 0, s2
201 ; CHECK-NEXT: v_mov_b32_e32 v3, s1
202 ; CHECK-NEXT: v_madmk_f32 v1, v1, 0x4f800000, v2
203 ; CHECK-NEXT: s_subb_u32 s5, 0, s3
204 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v1, v1
205 ; CHECK-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
206 ; CHECK-NEXT: v_mul_f32_e32 v4, 0x2f800000, v1
207 ; CHECK-NEXT: v_trunc_f32_e32 v4, v4
208 ; CHECK-NEXT: v_mac_f32_e32 v1, 0xcf800000, v4
209 ; CHECK-NEXT: v_cvt_u32_f32_e32 v4, v4
210 ; CHECK-NEXT: v_cvt_u32_f32_e32 v1, v1
211 ; CHECK-NEXT: v_mul_lo_u32 v5, s4, v4
212 ; CHECK-NEXT: v_mul_lo_u32 v6, s4, v1
213 ; CHECK-NEXT: v_mul_lo_u32 v7, s5, v1
214 ; CHECK-NEXT: v_mul_hi_u32 v8, s4, v1
215 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
216 ; CHECK-NEXT: v_mul_lo_u32 v7, v4, v6
217 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v6
218 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
219 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
220 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v5
221 ; CHECK-NEXT: v_mul_lo_u32 v10, v4, v5
222 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v5
223 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
224 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v8
225 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
226 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v10, v6
227 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
228 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v9
229 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
230 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v11
231 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
232 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
233 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v9
234 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
235 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
236 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
237 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
238 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v6
239 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v5, vcc
240 ; CHECK-NEXT: v_mul_lo_u32 v5, s4, v1
241 ; CHECK-NEXT: v_mul_lo_u32 v6, s5, v1
242 ; CHECK-NEXT: v_mul_hi_u32 v7, s4, v1
243 ; CHECK-NEXT: v_mul_lo_u32 v8, s4, v4
244 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v5
245 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v5
246 ; CHECK-NEXT: v_mul_hi_u32 v5, v4, v5
247 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v8
248 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
249 ; CHECK-NEXT: v_mul_lo_u32 v7, v1, v6
250 ; CHECK-NEXT: v_mul_lo_u32 v8, v4, v6
251 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v6
252 ; CHECK-NEXT: v_mul_hi_u32 v6, v4, v6
253 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
254 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
255 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v8, v5
256 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
257 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v10
258 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
259 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v11
260 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
261 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
262 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
263 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
264 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
265 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
266 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v6, v7
267 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5
268 ; CHECK-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc
269 ; CHECK-NEXT: v_mul_lo_u32 v5, s1, v1
270 ; CHECK-NEXT: v_mul_hi_u32 v6, s0, v1
271 ; CHECK-NEXT: v_mul_hi_u32 v1, s1, v1
272 ; CHECK-NEXT: v_mul_lo_u32 v7, s0, v4
273 ; CHECK-NEXT: v_mul_lo_u32 v8, s1, v4
274 ; CHECK-NEXT: v_mul_hi_u32 v9, s0, v4
275 ; CHECK-NEXT: v_mul_hi_u32 v4, s1, v4
276 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v7
277 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
278 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v8, v1
279 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
280 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v6
281 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
282 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9
283 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
284 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v7, v5
285 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v8, v6
286 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5
287 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
288 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
289 ; CHECK-NEXT: v_mul_lo_u32 v6, s2, v1
290 ; CHECK-NEXT: v_mul_lo_u32 v7, s3, v1
291 ; CHECK-NEXT: v_mul_hi_u32 v8, s2, v1
292 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, 1, v1
293 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
294 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v9
295 ; CHECK-NEXT: v_mul_lo_u32 v4, s2, v4
296 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v7, v4
297 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v8
298 ; CHECK-NEXT: v_sub_i32_e32 v6, vcc, s0, v6
299 ; CHECK-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v4, vcc
300 ; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], s1, v4
301 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s2, v6
302 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
303 ; CHECK-NEXT: v_cmp_le_u32_e64 s[4:5], s3, v3
304 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5]
305 ; CHECK-NEXT: v_subb_u32_e32 v0, vcc, v4, v0, vcc
306 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v3
307 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v8, v7, vcc
308 ; CHECK-NEXT: v_subrev_i32_e32 v4, vcc, s2, v6
309 ; CHECK-NEXT: v_subbrev_u32_e32 v0, vcc, 0, v0, vcc
310 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v4
311 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
312 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s3, v0
313 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
314 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s3, v0
315 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc
316 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
317 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v9, v5, vcc
318 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
319 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
320 ; CHECK-NEXT: s_mov_b32 s4, 0
321 ; CHECK-NEXT: s_branch .LBB1_3
322 ; CHECK-NEXT: .LBB1_2:
323 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
324 ; CHECK-NEXT: .LBB1_3: ; %Flow
325 ; CHECK-NEXT: s_xor_b32 s1, s4, 1
326 ; CHECK-NEXT: s_and_b32 s1, s1, 1
327 ; CHECK-NEXT: s_cmp_lg_u32 s1, 0
328 ; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
329 ; CHECK-NEXT: ; %bb.4:
330 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
331 ; CHECK-NEXT: s_sub_i32 s1, 0, s2
332 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
333 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
334 ; CHECK-NEXT: v_mul_lo_u32 v1, s1, v0
335 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
336 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
337 ; CHECK-NEXT: v_mul_hi_u32 v0, s0, v0
338 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, s2
339 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
340 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, s0, v1
341 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
342 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
343 ; CHECK-NEXT: v_subrev_i32_e64 v2, s[0:1], s2, v1
344 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
345 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
346 ; CHECK-NEXT: v_cmp_le_u32_e32 vcc, s2, v1
347 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
348 ; CHECK-NEXT: .LBB1_5:
349 ; CHECK-NEXT: v_readfirstlane_b32 s0, v0
350 ; CHECK-NEXT: s_mov_b32 s1, s0
351 ; CHECK-NEXT: ; return to shader part epilog
352 %result = udiv i64 %num, %den
353 %cast = bitcast i64 %result to <2 x i32>
354 %elt.0 = extractelement <2 x i32> %cast, i32 0
355 %elt.1 = extractelement <2 x i32> %cast, i32 1
356 %res.0 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.0)
357 %res.1 = call i32 @llvm.amdgcn.readfirstlane(i32 %elt.1)
358 %ins.0 = insertelement <2 x i32> undef, i32 %res.0, i32 0
359 %ins.1 = insertelement <2 x i32> %ins.0, i32 %res.0, i32 1
360 %cast.back = bitcast <2 x i32> %ins.1 to i64
364 define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
365 ; GISEL-LABEL: v_udiv_v2i64:
367 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
368 ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v4
369 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v5
370 ; GISEL-NEXT: v_sub_i32_e32 v8, vcc, 0, v4
371 ; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v6
372 ; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v7
373 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v6
374 ; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v5, vcc
375 ; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v7, s[4:5]
376 ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11
377 ; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14
378 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10
379 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13
380 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10
381 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11
382 ; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10
383 ; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11
384 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13
385 ; GISEL-NEXT: v_trunc_f32_e32 v14, v14
386 ; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13
387 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
388 ; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14
389 ; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14
390 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10
391 ; GISEL-NEXT: v_mul_lo_u32 v16, v8, v13
392 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
393 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14
394 ; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11
395 ; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11
396 ; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11
397 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
398 ; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18
399 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
400 ; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17
401 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
402 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18
403 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20
404 ; GISEL-NEXT: v_mul_lo_u32 v19, v8, v10
405 ; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10
406 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16
407 ; GISEL-NEXT: v_mul_hi_u32 v20, v8, v10
408 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
409 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19
410 ; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16
411 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
412 ; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19
413 ; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21
414 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19
415 ; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18
416 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16
417 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
418 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17
419 ; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18
420 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16
421 ; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20
422 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17
423 ; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20
424 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
425 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9]
426 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
427 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11]
428 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15]
429 ; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22
430 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
431 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5]
432 ; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23
433 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13]
434 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17]
435 ; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24
436 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
437 ; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22
438 ; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19
439 ; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16
440 ; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17
441 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18
442 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
443 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18
444 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
445 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19
446 ; GISEL-NEXT: v_mul_lo_u32 v20, v8, v10
447 ; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10
448 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18
449 ; GISEL-NEXT: v_mul_hi_u32 v18, v8, v10
450 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19
451 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11
452 ; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11
453 ; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
454 ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11
455 ; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
456 ; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20
457 ; GISEL-NEXT: v_mul_lo_u32 v8, v8, v13
458 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v15, v8
459 ; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19
460 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14
461 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
462 ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20
463 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20
464 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v18
465 ; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19
466 ; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19
467 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16
468 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v8
469 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
470 ; GISEL-NEXT: v_mul_lo_u32 v16, v13, v8
471 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17
472 ; GISEL-NEXT: v_mul_hi_u32 v12, v10, v8
473 ; GISEL-NEXT: v_mul_hi_u32 v8, v13, v8
474 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9
475 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
476 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9
477 ; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17
478 ; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9
479 ; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9
480 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
481 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
482 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15
483 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
484 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
485 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12
486 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11]
487 ; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18
488 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
489 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19
490 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
491 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20
492 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
493 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
494 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9]
495 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20
496 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19
497 ; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17
498 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
499 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
500 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
501 ; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17
502 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18
503 ; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19
504 ; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10
505 ; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10
506 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10
507 ; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11
508 ; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11
509 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11
510 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v12
511 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
512 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v13, v8, vcc
513 ; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
514 ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v8
515 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v8
516 ; GISEL-NEXT: v_mul_hi_u32 v14, v0, v8
517 ; GISEL-NEXT: v_mul_hi_u32 v15, v1, v8
518 ; GISEL-NEXT: v_mul_lo_u32 v8, v2, v9
519 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9
520 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
521 ; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9
522 ; GISEL-NEXT: v_mul_hi_u32 v21, v3, v9
523 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v13, v10
524 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v18, v8
525 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v20, v11
526 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
527 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
528 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
529 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
530 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
531 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14
532 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], v8, v19
533 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v10, v16
534 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
535 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
536 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
537 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
538 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v11, v8
539 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v14
540 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16
541 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
542 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
543 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v13
544 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
545 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
546 ; GISEL-NEXT: v_mul_lo_u32 v14, v4, v8
547 ; GISEL-NEXT: v_mul_lo_u32 v16, v5, v8
548 ; GISEL-NEXT: v_mul_hi_u32 v17, v4, v8
549 ; GISEL-NEXT: v_mul_lo_u32 v18, v6, v9
550 ; GISEL-NEXT: v_mul_lo_u32 v19, v7, v9
551 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
552 ; GISEL-NEXT: v_mul_hi_u32 v11, v6, v9
553 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
554 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v8
555 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14
556 ; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 1, v9
557 ; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v18
558 ; GISEL-NEXT: v_add_i32_e64 v18, s[10:11], 1, v13
559 ; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], v15, v10
560 ; GISEL-NEXT: v_add_i32_e64 v15, s[12:13], 1, v14
561 ; GISEL-NEXT: v_add_i32_e64 v12, s[14:15], v21, v12
562 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v0, v4
563 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v6
564 ; GISEL-NEXT: v_sub_i32_e64 v0, s[18:19], v0, v4
565 ; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v6
566 ; GISEL-NEXT: v_mul_lo_u32 v20, v4, v10
567 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v0, v4
568 ; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v10, vcc
569 ; GISEL-NEXT: v_mul_lo_u32 v4, v6, v12
570 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
571 ; GISEL-NEXT: v_addc_u32_e64 v2, s[6:7], 0, v12, s[6:7]
572 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[14:15]
573 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
574 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[16:17]
575 ; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v19, v4
576 ; GISEL-NEXT: v_addc_u32_e64 v19, s[6:7], 0, v0, s[10:11]
577 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v17
578 ; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], 0, v2, s[12:13]
579 ; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v11
580 ; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], v1, v16, s[4:5]
581 ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v16
582 ; GISEL-NEXT: v_subb_u32_e64 v16, s[6:7], v3, v4, s[8:9]
583 ; GISEL-NEXT: v_sub_i32_e64 v3, s[6:7], v3, v4
584 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[22:23]
585 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v11, v5
586 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v11, v5
587 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
588 ; GISEL-NEXT: v_subb_u32_e64 v1, vcc, v1, v5, s[4:5]
589 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v16, v7
590 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], v3, v7, s[8:9]
591 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], v16, v7
592 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[6:7]
593 ; GISEL-NEXT: v_cndmask_b32_e64 v6, v16, v6, s[10:11]
594 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc
595 ; GISEL-NEXT: v_subbrev_u32_e64 v1, vcc, 0, v1, s[18:19]
596 ; GISEL-NEXT: v_subbrev_u32_e64 v3, vcc, 0, v3, s[20:21]
597 ; GISEL-NEXT: v_cndmask_b32_e64 v16, v16, v20, s[4:5]
598 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
599 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v7
600 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v1, v5
601 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v3, v7
602 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
603 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[4:5]
604 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
605 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v16
606 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v1, v4, s[6:7]
607 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[8:9]
608 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v1
609 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v3
610 ; GISEL-NEXT: v_cndmask_b32_e64 v1, v13, v18, s[6:7]
611 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v15, s[8:9]
612 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v19, s[6:7]
613 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v2, v17, s[8:9]
614 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v8, v1, vcc
615 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[4:5]
616 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc
617 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v12, v5, s[4:5]
618 ; GISEL-NEXT: s_setpc_b64 s[30:31]
620 ; CGP-LABEL: v_udiv_v2i64:
622 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
623 ; CGP-NEXT: v_mov_b32_e32 v10, v0
624 ; CGP-NEXT: v_mov_b32_e32 v11, v1
625 ; CGP-NEXT: v_mov_b32_e32 v8, v2
626 ; CGP-NEXT: v_mov_b32_e32 v9, v3
627 ; CGP-NEXT: v_or_b32_e32 v1, v11, v5
628 ; CGP-NEXT: v_mov_b32_e32 v0, 0
629 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
630 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v4
631 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
632 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
633 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
634 ; CGP-NEXT: s_cbranch_execz .LBB2_2
636 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v5
637 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
638 ; CGP-NEXT: v_subb_u32_e32 v3, vcc, 0, v5, vcc
639 ; CGP-NEXT: v_mac_f32_e32 v2, 0x4f800000, v0
640 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2
641 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
642 ; CGP-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
643 ; CGP-NEXT: v_trunc_f32_e32 v2, v2
644 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
645 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
646 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
647 ; CGP-NEXT: v_mul_lo_u32 v12, v1, v2
648 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v0
649 ; CGP-NEXT: v_mul_lo_u32 v14, v3, v0
650 ; CGP-NEXT: v_mul_hi_u32 v15, v1, v0
651 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
652 ; CGP-NEXT: v_mul_lo_u32 v14, v2, v13
653 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v13
654 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v13
655 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v15
656 ; CGP-NEXT: v_mul_lo_u32 v15, v0, v12
657 ; CGP-NEXT: v_mul_lo_u32 v17, v2, v12
658 ; CGP-NEXT: v_mul_hi_u32 v18, v0, v12
659 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12
660 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
661 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
662 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v17, v13
663 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
664 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
665 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
666 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v18
667 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
668 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
669 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v17, v16
670 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v14
671 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
672 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v15, v14
673 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
674 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v13
675 ; CGP-NEXT: v_addc_u32_e32 v2, vcc, v2, v12, vcc
676 ; CGP-NEXT: v_mul_lo_u32 v12, v1, v0
677 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v0
678 ; CGP-NEXT: v_mul_hi_u32 v13, v1, v0
679 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v2
680 ; CGP-NEXT: v_mul_lo_u32 v14, v2, v12
681 ; CGP-NEXT: v_mul_hi_u32 v15, v0, v12
682 ; CGP-NEXT: v_mul_hi_u32 v12, v2, v12
683 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v3, v1
684 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13
685 ; CGP-NEXT: v_mul_lo_u32 v3, v0, v1
686 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v1
687 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v1
688 ; CGP-NEXT: v_mul_hi_u32 v1, v2, v1
689 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
690 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
691 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
692 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
693 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v15
694 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
695 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
696 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
697 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v14, v3
698 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
699 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v12, v3
700 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
701 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
702 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v12
703 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v3
704 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
705 ; CGP-NEXT: v_mul_lo_u32 v2, v11, v0
706 ; CGP-NEXT: v_mul_hi_u32 v3, v10, v0
707 ; CGP-NEXT: v_mul_hi_u32 v0, v11, v0
708 ; CGP-NEXT: v_mul_lo_u32 v12, v10, v1
709 ; CGP-NEXT: v_mul_lo_u32 v13, v11, v1
710 ; CGP-NEXT: v_mul_hi_u32 v14, v10, v1
711 ; CGP-NEXT: v_mul_hi_u32 v1, v11, v1
712 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
713 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
714 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v13, v0
715 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
716 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
717 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
718 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
719 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
720 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
721 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v13, v3
722 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v2
723 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
724 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v3, v2
725 ; CGP-NEXT: v_mul_lo_u32 v3, v4, v0
726 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v0
727 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v0
728 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v2
729 ; CGP-NEXT: v_mul_lo_u32 v2, v4, v1
730 ; CGP-NEXT: v_add_i32_e32 v14, vcc, 1, v0
731 ; CGP-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc
732 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v12, v2
733 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v14
734 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v15, vcc
735 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v13
736 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v10, v3
737 ; CGP-NEXT: v_subb_u32_e64 v10, s[4:5], v11, v2, vcc
738 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v11, v2
739 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v4
740 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
741 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v10, v5
742 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[4:5]
743 ; CGP-NEXT: v_subb_u32_e32 v2, vcc, v2, v5, vcc
744 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v10, v5
745 ; CGP-NEXT: v_cndmask_b32_e32 v10, v13, v11, vcc
746 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v3, v4
747 ; CGP-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
748 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v4
749 ; CGP-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
750 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
751 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
752 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v2, v5
753 ; CGP-NEXT: v_cndmask_b32_e32 v2, v4, v3, vcc
754 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
755 ; CGP-NEXT: v_cndmask_b32_e32 v2, v14, v12, vcc
756 ; CGP-NEXT: v_cndmask_b32_e32 v3, v15, v16, vcc
757 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
758 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
759 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
760 ; CGP-NEXT: ; implicit-def: $vgpr2
761 ; CGP-NEXT: ; implicit-def: $vgpr4
762 ; CGP-NEXT: ; implicit-def: $vgpr10
763 ; CGP-NEXT: .LBB2_2: ; %Flow1
764 ; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
765 ; CGP-NEXT: s_cbranch_execz .LBB2_4
767 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v2
768 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v4
769 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
770 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
771 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
772 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
773 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
774 ; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
775 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
776 ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
777 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
778 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
779 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
780 ; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4
781 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
782 ; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
783 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
784 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
785 ; CGP-NEXT: v_mov_b32_e32 v1, 0
787 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
788 ; CGP-NEXT: v_or_b32_e32 v3, v9, v7
789 ; CGP-NEXT: v_mov_b32_e32 v2, 0
790 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
791 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v6
792 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
793 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
794 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
795 ; CGP-NEXT: s_cbranch_execnz .LBB2_7
796 ; CGP-NEXT: ; %bb.5: ; %Flow
797 ; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
798 ; CGP-NEXT: s_cbranch_execnz .LBB2_8
800 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
801 ; CGP-NEXT: s_setpc_b64 s[30:31]
803 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v7
804 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
805 ; CGP-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc
806 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v2
807 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
808 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
809 ; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2
810 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
811 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
812 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
813 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
814 ; CGP-NEXT: v_mul_lo_u32 v10, v3, v4
815 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v2
816 ; CGP-NEXT: v_mul_lo_u32 v12, v5, v2
817 ; CGP-NEXT: v_mul_hi_u32 v13, v3, v2
818 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v12, v10
819 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v11
820 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
821 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v11
822 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v13
823 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v10
824 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v10
825 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v10
826 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10
827 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
828 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
829 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
830 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
831 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
832 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
833 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
834 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
835 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
836 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
837 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
838 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
839 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
840 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v12
841 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
842 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc
843 ; CGP-NEXT: v_mul_lo_u32 v10, v3, v2
844 ; CGP-NEXT: v_mul_lo_u32 v5, v5, v2
845 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v2
846 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v4
847 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v10
848 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v10
849 ; CGP-NEXT: v_mul_hi_u32 v10, v4, v10
850 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v5, v3
851 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11
852 ; CGP-NEXT: v_mul_lo_u32 v5, v2, v3
853 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v3
854 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3
855 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3
856 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
857 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
858 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
859 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
860 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v5, v13
861 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
862 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v10, v14
863 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
864 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v12, v5
865 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
866 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v10, v5
867 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
868 ; CGP-NEXT: v_add_i32_e32 v10, vcc, v11, v10
869 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v10
870 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v5
871 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc
872 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v2
873 ; CGP-NEXT: v_mul_hi_u32 v5, v8, v2
874 ; CGP-NEXT: v_mul_hi_u32 v2, v9, v2
875 ; CGP-NEXT: v_mul_lo_u32 v10, v8, v3
876 ; CGP-NEXT: v_mul_lo_u32 v11, v9, v3
877 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v3
878 ; CGP-NEXT: v_mul_hi_u32 v3, v9, v3
879 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v10
880 ; CGP-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
881 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
882 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
883 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v5
884 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
885 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
886 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
887 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
888 ; CGP-NEXT: v_add_i32_e32 v5, vcc, v11, v5
889 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
890 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
891 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v5, v4
892 ; CGP-NEXT: v_mul_lo_u32 v5, v6, v2
893 ; CGP-NEXT: v_mul_lo_u32 v10, v7, v2
894 ; CGP-NEXT: v_mul_hi_u32 v11, v6, v2
895 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
896 ; CGP-NEXT: v_mul_lo_u32 v4, v6, v3
897 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2
898 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
899 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v10, v4
900 ; CGP-NEXT: v_add_i32_e32 v10, vcc, 1, v12
901 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc
902 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
903 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v8, v5
904 ; CGP-NEXT: v_subb_u32_e64 v8, s[4:5], v9, v4, vcc
905 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4
906 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v6
907 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
908 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v7
909 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
910 ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v7, vcc
911 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v8, v7
912 ; CGP-NEXT: v_cndmask_b32_e32 v8, v11, v9, vcc
913 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6
914 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
915 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v6
916 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
917 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v7
918 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
919 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v7
920 ; CGP-NEXT: v_cndmask_b32_e32 v4, v6, v5, vcc
921 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
922 ; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v10, vcc
923 ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
924 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8
925 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
926 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
927 ; CGP-NEXT: ; implicit-def: $vgpr4
928 ; CGP-NEXT: ; implicit-def: $vgpr6
929 ; CGP-NEXT: ; implicit-def: $vgpr8
930 ; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
931 ; CGP-NEXT: s_cbranch_execz .LBB2_6
933 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
934 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v6
935 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
936 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
937 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
938 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
939 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
940 ; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
941 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v6
942 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
943 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3
944 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
945 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
946 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6
947 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
948 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
949 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
950 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
951 ; CGP-NEXT: v_mov_b32_e32 v3, 0
952 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
953 ; CGP-NEXT: s_setpc_b64 s[30:31]
954 %result = udiv <2 x i64> %num, %den
955 ret <2 x i64> %result
958 define i64 @v_udiv_i64_pow2k_denom(i64 %num) {
959 ; CHECK-LABEL: v_udiv_i64_pow2k_denom:
961 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
963 ; CHECK-NEXT: s_setpc_b64 s[30:31]
964 %result = udiv i64 %num, 4096
968 define <2 x i64> @v_udiv_v2i64_pow2k_denom(<2 x i64> %num) {
969 ; CHECK-LABEL: v_udiv_v2i64_pow2k_denom:
971 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 12
973 ; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 12
974 ; CHECK-NEXT: s_setpc_b64 s[30:31]
975 %result = udiv <2 x i64> %num, <i64 4096, i64 4096>
976 ret <2 x i64> %result
979 define i64 @v_udiv_i64_oddk_denom(i64 %num) {
980 ; CHECK-LABEL: v_udiv_i64_oddk_denom:
982 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; CHECK-NEXT: v_mov_b32_e32 v2, 0x1fb03c31
984 ; CHECK-NEXT: v_mov_b32_e32 v3, 0xd9528440
985 ; CHECK-NEXT: v_mul_lo_u32 v4, v1, v2
986 ; CHECK-NEXT: v_mul_lo_u32 v5, v0, v3
987 ; CHECK-NEXT: v_mul_hi_u32 v6, v0, v2
988 ; CHECK-NEXT: v_mul_lo_u32 v7, v1, v3
989 ; CHECK-NEXT: v_mul_hi_u32 v2, v1, v2
990 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, v3
991 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v5
992 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
993 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
994 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
995 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v6
996 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
997 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v2, v0
998 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
999 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v5, v4
1000 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1001 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1002 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1003 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v4
1004 ; CHECK-NEXT: v_mul_hi_u32 v1, v1, v3
1005 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1006 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20
1007 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1008 %result = udiv i64 %num, 1235195
1012 define <2 x i64> @v_udiv_v2i64_oddk_denom(<2 x i64> %num) {
1013 ; CHECK-LABEL: v_udiv_v2i64_oddk_denom:
1015 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1016 ; CHECK-NEXT: v_mov_b32_e32 v4, 0x1fb03c31
1017 ; CHECK-NEXT: v_mov_b32_e32 v5, 0xd9528440
1018 ; CHECK-NEXT: v_mul_lo_u32 v6, v1, v4
1019 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v5
1020 ; CHECK-NEXT: v_mul_hi_u32 v8, v0, v4
1021 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v5
1022 ; CHECK-NEXT: v_mul_hi_u32 v10, v1, v4
1023 ; CHECK-NEXT: v_mul_hi_u32 v0, v0, v5
1024 ; CHECK-NEXT: v_mul_hi_u32 v1, v1, v5
1025 ; CHECK-NEXT: v_mul_lo_u32 v11, v3, v4
1026 ; CHECK-NEXT: v_mul_lo_u32 v12, v2, v5
1027 ; CHECK-NEXT: v_mul_hi_u32 v13, v2, v4
1028 ; CHECK-NEXT: v_mul_lo_u32 v14, v3, v5
1029 ; CHECK-NEXT: v_mul_hi_u32 v4, v3, v4
1030 ; CHECK-NEXT: v_mul_hi_u32 v2, v2, v5
1031 ; CHECK-NEXT: v_mul_hi_u32 v3, v3, v5
1032 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v7
1033 ; CHECK-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1034 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v10
1035 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1036 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v12
1037 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1038 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v14, v4
1039 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1040 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v5, v8
1041 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1042 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v7, v0
1043 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1044 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v13
1045 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1046 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v4, v2
1047 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1048 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
1049 ; CHECK-NEXT: v_add_i32_e32 v6, vcc, v9, v7
1050 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v11, v8
1051 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v12, v4
1052 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v5
1053 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, 1, vcc
1054 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1055 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1056 ; CHECK-NEXT: v_add_i32_e32 v5, vcc, v6, v5
1057 ; CHECK-NEXT: v_add_i32_e32 v4, vcc, v4, v7
1058 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v5
1059 ; CHECK-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1060 ; CHECK-NEXT: v_lshr_b64 v[0:1], v[0:1], 20
1061 ; CHECK-NEXT: v_lshr_b64 v[2:3], v[2:3], 20
1062 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1063 %result = udiv <2 x i64> %num, <i64 1235195, i64 1235195>
1064 ret <2 x i64> %result
1067 define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
1068 ; CHECK-LABEL: v_udiv_i64_pow2_shl_denom:
1070 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1071 ; CHECK-NEXT: v_mov_b32_e32 v3, v0
1072 ; CHECK-NEXT: v_mov_b32_e32 v4, v1
1073 ; CHECK-NEXT: v_mov_b32_e32 v0, 0x1000
1074 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
1075 ; CHECK-NEXT: v_mov_b32_e32 v7, 0
1076 ; CHECK-NEXT: v_lshl_b64 v[5:6], v[0:1], v2
1077 ; CHECK-NEXT: v_or_b32_e32 v8, v4, v6
1078 ; CHECK-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[7:8]
1079 ; CHECK-NEXT: v_cvt_f32_u32_e32 v2, v5
1080 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
1081 ; CHECK-NEXT: s_and_saveexec_b64 s[4:5], vcc
1082 ; CHECK-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1083 ; CHECK-NEXT: s_cbranch_execnz .LBB7_3
1084 ; CHECK-NEXT: ; %bb.1: ; %Flow
1085 ; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
1086 ; CHECK-NEXT: s_cbranch_execnz .LBB7_4
1087 ; CHECK-NEXT: .LBB7_2:
1088 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
1089 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1090 ; CHECK-NEXT: .LBB7_3:
1091 ; CHECK-NEXT: v_cvt_f32_u32_e32 v0, v6
1092 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
1093 ; CHECK-NEXT: v_subb_u32_e32 v7, vcc, 0, v6, vcc
1094 ; CHECK-NEXT: v_mac_f32_e32 v2, 0x4f800000, v0
1095 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
1096 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
1097 ; CHECK-NEXT: v_mul_f32_e32 v2, 0x2f800000, v0
1098 ; CHECK-NEXT: v_trunc_f32_e32 v2, v2
1099 ; CHECK-NEXT: v_mac_f32_e32 v0, 0xcf800000, v2
1100 ; CHECK-NEXT: v_cvt_u32_f32_e32 v2, v2
1101 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1102 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v2
1103 ; CHECK-NEXT: v_mul_lo_u32 v9, v1, v0
1104 ; CHECK-NEXT: v_mul_lo_u32 v10, v7, v0
1105 ; CHECK-NEXT: v_mul_hi_u32 v11, v1, v0
1106 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v10, v8
1107 ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v9
1108 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v9
1109 ; CHECK-NEXT: v_mul_hi_u32 v9, v2, v9
1110 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v11
1111 ; CHECK-NEXT: v_mul_lo_u32 v11, v0, v8
1112 ; CHECK-NEXT: v_mul_lo_u32 v13, v2, v8
1113 ; CHECK-NEXT: v_mul_hi_u32 v14, v0, v8
1114 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8
1115 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v11
1116 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1117 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v13, v9
1118 ; CHECK-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1119 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1120 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1121 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v14
1122 ; CHECK-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1123 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1124 ; CHECK-NEXT: v_add_i32_e32 v11, vcc, v13, v12
1125 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v10
1126 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1127 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1128 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v10
1129 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v9
1130 ; CHECK-NEXT: v_addc_u32_e32 v2, vcc, v2, v8, vcc
1131 ; CHECK-NEXT: v_mul_lo_u32 v8, v1, v0
1132 ; CHECK-NEXT: v_mul_lo_u32 v7, v7, v0
1133 ; CHECK-NEXT: v_mul_hi_u32 v9, v1, v0
1134 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v2
1135 ; CHECK-NEXT: v_mul_lo_u32 v10, v2, v8
1136 ; CHECK-NEXT: v_mul_hi_u32 v11, v0, v8
1137 ; CHECK-NEXT: v_mul_hi_u32 v8, v2, v8
1138 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v7, v1
1139 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v9
1140 ; CHECK-NEXT: v_mul_lo_u32 v7, v0, v1
1141 ; CHECK-NEXT: v_mul_lo_u32 v9, v2, v1
1142 ; CHECK-NEXT: v_mul_hi_u32 v12, v0, v1
1143 ; CHECK-NEXT: v_mul_hi_u32 v1, v2, v1
1144 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1145 ; CHECK-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1146 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1147 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1148 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v7, v11
1149 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1150 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1151 ; CHECK-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1152 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v10, v7
1153 ; CHECK-NEXT: v_add_i32_e32 v9, vcc, v9, v11
1154 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v8, v7
1155 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1156 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, v9, v8
1157 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v8
1158 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v7
1159 ; CHECK-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
1160 ; CHECK-NEXT: v_mul_lo_u32 v2, v4, v0
1161 ; CHECK-NEXT: v_mul_hi_u32 v7, v3, v0
1162 ; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
1163 ; CHECK-NEXT: v_mul_lo_u32 v8, v3, v1
1164 ; CHECK-NEXT: v_mul_lo_u32 v9, v4, v1
1165 ; CHECK-NEXT: v_mul_hi_u32 v10, v3, v1
1166 ; CHECK-NEXT: v_mul_hi_u32 v1, v4, v1
1167 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v8
1168 ; CHECK-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1169 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v9, v0
1170 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1171 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v7
1172 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1173 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v10
1174 ; CHECK-NEXT: v_cndmask_b32_e64 v7, 0, 1, vcc
1175 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1176 ; CHECK-NEXT: v_add_i32_e32 v7, vcc, v9, v7
1177 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v2
1178 ; CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1179 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v7, v2
1180 ; CHECK-NEXT: v_mul_lo_u32 v7, v5, v0
1181 ; CHECK-NEXT: v_mul_lo_u32 v8, v6, v0
1182 ; CHECK-NEXT: v_mul_hi_u32 v9, v5, v0
1183 ; CHECK-NEXT: v_add_i32_e32 v1, vcc, v1, v2
1184 ; CHECK-NEXT: v_mul_lo_u32 v2, v5, v1
1185 ; CHECK-NEXT: v_add_i32_e32 v10, vcc, 1, v0
1186 ; CHECK-NEXT: v_addc_u32_e32 v11, vcc, 0, v1, vcc
1187 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1188 ; CHECK-NEXT: v_add_i32_e32 v8, vcc, 1, v10
1189 ; CHECK-NEXT: v_addc_u32_e32 v12, vcc, 0, v11, vcc
1190 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, v2, v9
1191 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v7
1192 ; CHECK-NEXT: v_subb_u32_e64 v7, s[4:5], v4, v2, vcc
1193 ; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v4, v2
1194 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v5
1195 ; CHECK-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[4:5]
1196 ; CHECK-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v6
1197 ; CHECK-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1198 ; CHECK-NEXT: v_subb_u32_e32 v2, vcc, v2, v6, vcc
1199 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v7, v6
1200 ; CHECK-NEXT: v_cndmask_b32_e32 v4, v9, v4, vcc
1201 ; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v3, v5
1202 ; CHECK-NEXT: v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
1203 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v5
1204 ; CHECK-NEXT: v_cndmask_b32_e64 v3, 0, -1, vcc
1205 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v6
1206 ; CHECK-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1207 ; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v2, v6
1208 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v5, v3, vcc
1209 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1210 ; CHECK-NEXT: v_cndmask_b32_e32 v2, v10, v8, vcc
1211 ; CHECK-NEXT: v_cndmask_b32_e32 v3, v11, v12, vcc
1212 ; CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1213 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1214 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1215 ; CHECK-NEXT: ; implicit-def: $vgpr2
1216 ; CHECK-NEXT: ; implicit-def: $vgpr5_vgpr6
1217 ; CHECK-NEXT: ; implicit-def: $vgpr3
1218 ; CHECK-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
1219 ; CHECK-NEXT: s_cbranch_execz .LBB7_2
1220 ; CHECK-NEXT: .LBB7_4:
1221 ; CHECK-NEXT: v_rcp_iflag_f32_e32 v0, v2
1222 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, 0, v5
1223 ; CHECK-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1224 ; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0
1225 ; CHECK-NEXT: v_mul_lo_u32 v1, v1, v0
1226 ; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
1227 ; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1228 ; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
1229 ; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5
1230 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1231 ; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1232 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1233 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1234 ; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5
1235 ; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1236 ; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1237 ; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1238 ; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1239 ; CHECK-NEXT: v_mov_b32_e32 v1, 0
1240 ; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
1241 ; CHECK-NEXT: s_setpc_b64 s[30:31]
1242 %shl.y = shl i64 4096, %y
1243 %r = udiv i64 %x, %shl.y
1247 define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
1248 ; GISEL-LABEL: v_udiv_v2i64_pow2_shl_denom:
1250 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1251 ; GISEL-NEXT: v_mov_b32_e32 v9, 0x1000
1252 ; GISEL-NEXT: v_mov_b32_e32 v10, 0
1253 ; GISEL-NEXT: v_lshl_b64 v[7:8], v[9:10], v4
1254 ; GISEL-NEXT: v_lshl_b64 v[4:5], v[9:10], v6
1255 ; GISEL-NEXT: v_cvt_f32_u32_e32 v10, v7
1256 ; GISEL-NEXT: v_cvt_f32_u32_e32 v11, v8
1257 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, 0, v7
1258 ; GISEL-NEXT: v_cvt_f32_u32_e32 v13, v4
1259 ; GISEL-NEXT: v_cvt_f32_u32_e32 v14, v5
1260 ; GISEL-NEXT: v_sub_i32_e64 v9, s[4:5], 0, v4
1261 ; GISEL-NEXT: v_subb_u32_e32 v15, vcc, 0, v8, vcc
1262 ; GISEL-NEXT: v_subb_u32_e64 v12, vcc, 0, v5, s[4:5]
1263 ; GISEL-NEXT: v_mac_f32_e32 v10, 0x4f800000, v11
1264 ; GISEL-NEXT: v_mac_f32_e32 v13, 0x4f800000, v14
1265 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v10, v10
1266 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v11, v13
1267 ; GISEL-NEXT: v_mul_f32_e32 v10, 0x5f7ffffc, v10
1268 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x5f7ffffc, v11
1269 ; GISEL-NEXT: v_mul_f32_e32 v13, 0x2f800000, v10
1270 ; GISEL-NEXT: v_mul_f32_e32 v14, 0x2f800000, v11
1271 ; GISEL-NEXT: v_trunc_f32_e32 v13, v13
1272 ; GISEL-NEXT: v_trunc_f32_e32 v14, v14
1273 ; GISEL-NEXT: v_mac_f32_e32 v10, 0xcf800000, v13
1274 ; GISEL-NEXT: v_cvt_u32_f32_e32 v13, v13
1275 ; GISEL-NEXT: v_mac_f32_e32 v11, 0xcf800000, v14
1276 ; GISEL-NEXT: v_cvt_u32_f32_e32 v14, v14
1277 ; GISEL-NEXT: v_cvt_u32_f32_e32 v10, v10
1278 ; GISEL-NEXT: v_mul_lo_u32 v16, v6, v13
1279 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
1280 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v14
1281 ; GISEL-NEXT: v_mul_lo_u32 v18, v9, v11
1282 ; GISEL-NEXT: v_mul_lo_u32 v19, v12, v11
1283 ; GISEL-NEXT: v_mul_hi_u32 v20, v9, v11
1284 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v19, v17
1285 ; GISEL-NEXT: v_mul_lo_u32 v19, v14, v18
1286 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
1287 ; GISEL-NEXT: v_mul_lo_u32 v20, v11, v17
1288 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
1289 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v18
1290 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v19, v20
1291 ; GISEL-NEXT: v_mul_lo_u32 v19, v6, v10
1292 ; GISEL-NEXT: v_mul_lo_u32 v20, v15, v10
1293 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v20, v16
1294 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v10
1295 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
1296 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v19
1297 ; GISEL-NEXT: v_mul_lo_u32 v21, v10, v16
1298 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
1299 ; GISEL-NEXT: v_mul_hi_u32 v21, v10, v19
1300 ; GISEL-NEXT: v_add_i32_e64 v20, s[8:9], v20, v21
1301 ; GISEL-NEXT: v_mul_hi_u32 v19, v13, v19
1302 ; GISEL-NEXT: v_mul_hi_u32 v18, v14, v18
1303 ; GISEL-NEXT: v_mul_lo_u32 v20, v13, v16
1304 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
1305 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v17
1306 ; GISEL-NEXT: v_add_i32_e64 v18, s[12:13], v20, v18
1307 ; GISEL-NEXT: v_mul_hi_u32 v20, v10, v16
1308 ; GISEL-NEXT: v_add_i32_e64 v19, s[14:15], v19, v20
1309 ; GISEL-NEXT: v_mul_hi_u32 v20, v11, v17
1310 ; GISEL-NEXT: v_add_i32_e64 v18, s[16:17], v18, v20
1311 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
1312 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[8:9]
1313 ; GISEL-NEXT: v_add_i32_e64 v20, s[6:7], v20, v21
1314 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[10:11]
1315 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[14:15]
1316 ; GISEL-NEXT: v_add_i32_e64 v21, s[6:7], v21, v22
1317 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
1318 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[4:5]
1319 ; GISEL-NEXT: v_add_i32_e32 v22, vcc, v22, v23
1320 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, s[12:13]
1321 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, s[16:17]
1322 ; GISEL-NEXT: v_add_i32_e32 v23, vcc, v23, v24
1323 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v19, v20
1324 ; GISEL-NEXT: v_add_i32_e64 v18, s[4:5], v18, v22
1325 ; GISEL-NEXT: v_add_i32_e64 v10, s[6:7], v10, v19
1326 ; GISEL-NEXT: v_mul_hi_u32 v16, v13, v16
1327 ; GISEL-NEXT: v_mul_hi_u32 v17, v14, v17
1328 ; GISEL-NEXT: v_add_i32_e64 v11, s[8:9], v11, v18
1329 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1330 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v21, v18
1331 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1332 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v23, v19
1333 ; GISEL-NEXT: v_mul_lo_u32 v20, v6, v10
1334 ; GISEL-NEXT: v_mul_lo_u32 v15, v15, v10
1335 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v18
1336 ; GISEL-NEXT: v_mul_hi_u32 v18, v6, v10
1337 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v19
1338 ; GISEL-NEXT: v_mul_lo_u32 v19, v9, v11
1339 ; GISEL-NEXT: v_mul_lo_u32 v12, v12, v11
1340 ; GISEL-NEXT: v_addc_u32_e64 v13, vcc, v13, v16, s[6:7]
1341 ; GISEL-NEXT: v_mul_hi_u32 v16, v9, v11
1342 ; GISEL-NEXT: v_addc_u32_e64 v14, vcc, v14, v17, s[8:9]
1343 ; GISEL-NEXT: v_mul_hi_u32 v17, v10, v20
1344 ; GISEL-NEXT: v_mul_lo_u32 v6, v6, v13
1345 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v15, v6
1346 ; GISEL-NEXT: v_mul_hi_u32 v15, v11, v19
1347 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v14
1348 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v12, v9
1349 ; GISEL-NEXT: v_mul_lo_u32 v12, v13, v20
1350 ; GISEL-NEXT: v_mul_hi_u32 v20, v13, v20
1351 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v18
1352 ; GISEL-NEXT: v_mul_lo_u32 v18, v14, v19
1353 ; GISEL-NEXT: v_mul_hi_u32 v19, v14, v19
1354 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v16
1355 ; GISEL-NEXT: v_mul_lo_u32 v16, v10, v6
1356 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1357 ; GISEL-NEXT: v_mul_lo_u32 v16, v13, v6
1358 ; GISEL-NEXT: v_add_i32_e64 v12, s[4:5], v12, v17
1359 ; GISEL-NEXT: v_mul_hi_u32 v12, v10, v6
1360 ; GISEL-NEXT: v_mul_hi_u32 v6, v13, v6
1361 ; GISEL-NEXT: v_mul_lo_u32 v17, v11, v9
1362 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], v16, v20
1363 ; GISEL-NEXT: v_mul_lo_u32 v20, v14, v9
1364 ; GISEL-NEXT: v_add_i32_e64 v17, s[8:9], v18, v17
1365 ; GISEL-NEXT: v_mul_hi_u32 v18, v11, v9
1366 ; GISEL-NEXT: v_mul_hi_u32 v9, v14, v9
1367 ; GISEL-NEXT: v_add_i32_e64 v19, s[10:11], v20, v19
1368 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1369 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v17, v15
1370 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, s[6:7]
1371 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1372 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v16, v12
1373 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[10:11]
1374 ; GISEL-NEXT: v_add_i32_e64 v18, s[8:9], v19, v18
1375 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1376 ; GISEL-NEXT: v_add_i32_e64 v19, s[4:5], v20, v19
1377 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[6:7]
1378 ; GISEL-NEXT: v_add_i32_e64 v15, s[4:5], v15, v20
1379 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1380 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v20
1381 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[8:9]
1382 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v16, v20
1383 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v19
1384 ; GISEL-NEXT: v_add_i32_e64 v17, s[4:5], v18, v17
1385 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1386 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1387 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
1388 ; GISEL-NEXT: v_add_i32_e64 v11, s[4:5], v11, v17
1389 ; GISEL-NEXT: v_add_i32_e64 v12, s[6:7], v15, v18
1390 ; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], v16, v19
1391 ; GISEL-NEXT: v_mul_lo_u32 v16, v1, v10
1392 ; GISEL-NEXT: v_mul_hi_u32 v17, v0, v10
1393 ; GISEL-NEXT: v_mul_hi_u32 v10, v1, v10
1394 ; GISEL-NEXT: v_mul_lo_u32 v18, v3, v11
1395 ; GISEL-NEXT: v_mul_hi_u32 v19, v2, v11
1396 ; GISEL-NEXT: v_mul_hi_u32 v11, v3, v11
1397 ; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v12
1398 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], v9, v15
1399 ; GISEL-NEXT: v_addc_u32_e32 v6, vcc, v13, v6, vcc
1400 ; GISEL-NEXT: v_addc_u32_e64 v9, vcc, v14, v9, s[4:5]
1401 ; GISEL-NEXT: v_mul_lo_u32 v12, v0, v6
1402 ; GISEL-NEXT: v_mul_lo_u32 v13, v1, v6
1403 ; GISEL-NEXT: v_mul_hi_u32 v14, v0, v6
1404 ; GISEL-NEXT: v_mul_hi_u32 v15, v1, v6
1405 ; GISEL-NEXT: v_mul_lo_u32 v6, v2, v9
1406 ; GISEL-NEXT: v_mul_lo_u32 v20, v3, v9
1407 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v16, v12
1408 ; GISEL-NEXT: v_mul_hi_u32 v16, v2, v9
1409 ; GISEL-NEXT: v_mul_hi_u32 v21, v3, v9
1410 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v13, v10
1411 ; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v18, v6
1412 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v20, v11
1413 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1414 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1415 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[6:7]
1416 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1417 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[8:9]
1418 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v9, v14
1419 ; GISEL-NEXT: v_add_i32_e64 v6, s[6:7], v6, v19
1420 ; GISEL-NEXT: v_add_i32_e64 v10, s[8:9], v10, v16
1421 ; GISEL-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1422 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
1423 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, s[6:7]
1424 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, s[8:9]
1425 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v11, v6
1426 ; GISEL-NEXT: v_add_i32_e32 v11, vcc, v13, v14
1427 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v16
1428 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v17
1429 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v9, v6
1430 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v10, v13
1431 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1432 ; GISEL-NEXT: v_cndmask_b32_e64 v13, 0, 1, s[4:5]
1433 ; GISEL-NEXT: v_mul_lo_u32 v14, v7, v6
1434 ; GISEL-NEXT: v_mul_lo_u32 v16, v8, v6
1435 ; GISEL-NEXT: v_mul_hi_u32 v17, v7, v6
1436 ; GISEL-NEXT: v_mul_lo_u32 v18, v4, v9
1437 ; GISEL-NEXT: v_mul_lo_u32 v19, v5, v9
1438 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
1439 ; GISEL-NEXT: v_mul_hi_u32 v11, v4, v9
1440 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1441 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v6
1442 ; GISEL-NEXT: v_sub_i32_e64 v0, s[4:5], v0, v14
1443 ; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 1, v9
1444 ; GISEL-NEXT: v_sub_i32_e64 v2, s[8:9], v2, v18
1445 ; GISEL-NEXT: v_add_i32_e64 v18, s[10:11], 1, v13
1446 ; GISEL-NEXT: v_add_i32_e64 v10, s[12:13], v15, v10
1447 ; GISEL-NEXT: v_add_i32_e64 v15, s[12:13], 1, v14
1448 ; GISEL-NEXT: v_add_i32_e64 v12, s[14:15], v21, v12
1449 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[14:15], v0, v7
1450 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[16:17], v2, v4
1451 ; GISEL-NEXT: v_sub_i32_e64 v0, s[18:19], v0, v7
1452 ; GISEL-NEXT: v_sub_i32_e64 v2, s[20:21], v2, v4
1453 ; GISEL-NEXT: v_mul_lo_u32 v20, v7, v10
1454 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[22:23], v0, v7
1455 ; GISEL-NEXT: v_addc_u32_e32 v0, vcc, 0, v10, vcc
1456 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
1457 ; GISEL-NEXT: v_mul_lo_u32 v2, v4, v12
1458 ; GISEL-NEXT: v_add_i32_e64 v4, s[24:25], v16, v20
1459 ; GISEL-NEXT: v_addc_u32_e64 v7, s[6:7], 0, v12, s[6:7]
1460 ; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], v19, v2
1461 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, -1, s[14:15]
1462 ; GISEL-NEXT: v_add_i32_e64 v4, s[6:7], v4, v17
1463 ; GISEL-NEXT: v_subb_u32_e64 v17, s[6:7], v1, v4, s[4:5]
1464 ; GISEL-NEXT: v_sub_i32_e64 v1, s[6:7], v1, v4
1465 ; GISEL-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[16:17]
1466 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v17, v8
1467 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[14:15], v17, v8
1468 ; GISEL-NEXT: v_addc_u32_e64 v17, s[10:11], 0, v0, s[10:11]
1469 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], v1, v8, s[4:5]
1470 ; GISEL-NEXT: v_subbrev_u32_e64 v1, s[4:5], 0, v1, s[18:19]
1471 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v1, v8
1472 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[10:11], v1, v8
1473 ; GISEL-NEXT: v_addc_u32_e64 v1, s[12:13], 0, v7, s[12:13]
1474 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[22:23]
1475 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, -1, vcc
1476 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v11
1477 ; GISEL-NEXT: v_subb_u32_e64 v11, vcc, v3, v2, s[8:9]
1478 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v3, v2
1479 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v11, v5
1480 ; GISEL-NEXT: v_subb_u32_e64 v2, s[8:9], v2, v5, s[8:9]
1481 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[8:9], v11, v5
1482 ; GISEL-NEXT: v_cndmask_b32_e64 v3, 0, -1, s[6:7]
1483 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc
1484 ; GISEL-NEXT: v_subbrev_u32_e64 v2, vcc, 0, v2, s[20:21]
1485 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v3, v16, s[14:15]
1486 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v11, v4, s[8:9]
1487 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1488 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], v2, v5
1489 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[4:5]
1490 ; GISEL-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1491 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1492 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4
1493 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[10:11]
1494 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v19, s[6:7]
1495 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v2
1496 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[8:9], 0, v3
1497 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v13, v18, s[6:7]
1498 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v14, v15, s[8:9]
1499 ; GISEL-NEXT: v_cndmask_b32_e64 v4, v0, v17, s[6:7]
1500 ; GISEL-NEXT: v_cndmask_b32_e64 v5, v7, v1, s[8:9]
1501 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc
1502 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[4:5]
1503 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v4, vcc
1504 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v12, v5, s[4:5]
1505 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1507 ; CGP-LABEL: v_udiv_v2i64_pow2_shl_denom:
1509 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1510 ; CGP-NEXT: v_mov_b32_e32 v8, v0
1511 ; CGP-NEXT: v_mov_b32_e32 v9, v1
1512 ; CGP-NEXT: v_mov_b32_e32 v5, v2
1513 ; CGP-NEXT: v_mov_b32_e32 v7, v3
1514 ; CGP-NEXT: v_mov_b32_e32 v10, 0x1000
1515 ; CGP-NEXT: v_mov_b32_e32 v11, 0
1516 ; CGP-NEXT: v_mov_b32_e32 v0, 0
1517 ; CGP-NEXT: v_lshl_b64 v[2:3], v[10:11], v4
1518 ; CGP-NEXT: v_or_b32_e32 v1, v9, v3
1519 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1520 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v2
1521 ; CGP-NEXT: ; implicit-def: $vgpr0_vgpr1
1522 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
1523 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1524 ; CGP-NEXT: s_cbranch_execz .LBB8_2
1525 ; CGP-NEXT: ; %bb.1:
1526 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v3
1527 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
1528 ; CGP-NEXT: v_subb_u32_e32 v12, vcc, 0, v3, vcc
1529 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v0
1530 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4
1531 ; CGP-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
1532 ; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v0
1533 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
1534 ; CGP-NEXT: v_mac_f32_e32 v0, 0xcf800000, v4
1535 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
1536 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
1537 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v4
1538 ; CGP-NEXT: v_mul_lo_u32 v14, v1, v0
1539 ; CGP-NEXT: v_mul_lo_u32 v15, v12, v0
1540 ; CGP-NEXT: v_mul_hi_u32 v16, v1, v0
1541 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v13
1542 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v14
1543 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v14
1544 ; CGP-NEXT: v_mul_hi_u32 v14, v4, v14
1545 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v16
1546 ; CGP-NEXT: v_mul_lo_u32 v16, v0, v13
1547 ; CGP-NEXT: v_mul_lo_u32 v18, v4, v13
1548 ; CGP-NEXT: v_mul_hi_u32 v19, v0, v13
1549 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13
1550 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1551 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1552 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v18, v14
1553 ; CGP-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1554 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v15, v17
1555 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1556 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v19
1557 ; CGP-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
1558 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
1559 ; CGP-NEXT: v_add_i32_e32 v16, vcc, v18, v17
1560 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1561 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1562 ; CGP-NEXT: v_add_i32_e32 v15, vcc, v16, v15
1563 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v15
1564 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v14
1565 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v13, vcc
1566 ; CGP-NEXT: v_mul_lo_u32 v13, v1, v0
1567 ; CGP-NEXT: v_mul_lo_u32 v12, v12, v0
1568 ; CGP-NEXT: v_mul_hi_u32 v14, v1, v0
1569 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v4
1570 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v13
1571 ; CGP-NEXT: v_mul_hi_u32 v16, v0, v13
1572 ; CGP-NEXT: v_mul_hi_u32 v13, v4, v13
1573 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v12, v1
1574 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v14
1575 ; CGP-NEXT: v_mul_lo_u32 v12, v0, v1
1576 ; CGP-NEXT: v_mul_lo_u32 v14, v4, v1
1577 ; CGP-NEXT: v_mul_hi_u32 v17, v0, v1
1578 ; CGP-NEXT: v_mul_hi_u32 v1, v4, v1
1579 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1580 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1581 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1582 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1583 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1584 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1585 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v13, v17
1586 ; CGP-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1587 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1588 ; CGP-NEXT: v_add_i32_e32 v14, vcc, v14, v16
1589 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1590 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1591 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v14, v13
1592 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v13
1593 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v12
1594 ; CGP-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
1595 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v0
1596 ; CGP-NEXT: v_mul_hi_u32 v12, v8, v0
1597 ; CGP-NEXT: v_mul_hi_u32 v0, v9, v0
1598 ; CGP-NEXT: v_mul_lo_u32 v13, v8, v1
1599 ; CGP-NEXT: v_mul_lo_u32 v14, v9, v1
1600 ; CGP-NEXT: v_mul_hi_u32 v15, v8, v1
1601 ; CGP-NEXT: v_mul_hi_u32 v1, v9, v1
1602 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v13
1603 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1604 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v14, v0
1605 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1606 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v12
1607 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1608 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v15
1609 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1610 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
1611 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v14, v12
1612 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v4
1613 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1614 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v12, v4
1615 ; CGP-NEXT: v_mul_lo_u32 v12, v2, v0
1616 ; CGP-NEXT: v_mul_lo_u32 v13, v3, v0
1617 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v0
1618 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v1, v4
1619 ; CGP-NEXT: v_mul_lo_u32 v4, v2, v1
1620 ; CGP-NEXT: v_add_i32_e32 v15, vcc, 1, v0
1621 ; CGP-NEXT: v_addc_u32_e32 v16, vcc, 0, v1, vcc
1622 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v13, v4
1623 ; CGP-NEXT: v_add_i32_e32 v13, vcc, 1, v15
1624 ; CGP-NEXT: v_addc_u32_e32 v17, vcc, 0, v16, vcc
1625 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v14
1626 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v12
1627 ; CGP-NEXT: v_subb_u32_e64 v12, s[4:5], v9, v4, vcc
1628 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v9, v4
1629 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2
1630 ; CGP-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5]
1631 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v12, v3
1632 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
1633 ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v3, vcc
1634 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v12, v3
1635 ; CGP-NEXT: v_cndmask_b32_e32 v9, v14, v9, vcc
1636 ; CGP-NEXT: v_sub_i32_e32 v8, vcc, v8, v2
1637 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
1638 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v8, v2
1639 ; CGP-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc
1640 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v3
1641 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc
1642 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v3
1643 ; CGP-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
1644 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
1645 ; CGP-NEXT: v_cndmask_b32_e32 v2, v15, v13, vcc
1646 ; CGP-NEXT: v_cndmask_b32_e32 v3, v16, v17, vcc
1647 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
1648 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1649 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1650 ; CGP-NEXT: ; implicit-def: $vgpr4
1651 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
1652 ; CGP-NEXT: ; implicit-def: $vgpr8
1653 ; CGP-NEXT: .LBB8_2: ; %Flow1
1654 ; CGP-NEXT: s_or_saveexec_b64 s[6:7], s[6:7]
1655 ; CGP-NEXT: v_lshl_b64 v[9:10], v[10:11], v6
1656 ; CGP-NEXT: s_xor_b64 exec, exec, s[6:7]
1657 ; CGP-NEXT: s_cbranch_execz .LBB8_4
1658 ; CGP-NEXT: ; %bb.3:
1659 ; CGP-NEXT: v_rcp_iflag_f32_e32 v0, v4
1660 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, 0, v2
1661 ; CGP-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0
1662 ; CGP-NEXT: v_cvt_u32_f32_e32 v0, v0
1663 ; CGP-NEXT: v_mul_lo_u32 v1, v1, v0
1664 ; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
1665 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
1666 ; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
1667 ; CGP-NEXT: v_mul_lo_u32 v1, v0, v2
1668 ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1669 ; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
1670 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
1671 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1672 ; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
1673 ; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
1674 ; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1675 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
1676 ; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1677 ; CGP-NEXT: v_mov_b32_e32 v1, 0
1678 ; CGP-NEXT: .LBB8_4:
1679 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
1680 ; CGP-NEXT: v_or_b32_e32 v3, v7, v10
1681 ; CGP-NEXT: v_mov_b32_e32 v2, 0
1682 ; CGP-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
1683 ; CGP-NEXT: v_cvt_f32_u32_e32 v4, v9
1684 ; CGP-NEXT: ; implicit-def: $vgpr2_vgpr3
1685 ; CGP-NEXT: s_and_saveexec_b64 s[4:5], vcc
1686 ; CGP-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
1687 ; CGP-NEXT: s_cbranch_execnz .LBB8_7
1688 ; CGP-NEXT: ; %bb.5: ; %Flow
1689 ; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
1690 ; CGP-NEXT: s_cbranch_execnz .LBB8_8
1691 ; CGP-NEXT: .LBB8_6:
1692 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
1693 ; CGP-NEXT: s_setpc_b64 s[30:31]
1694 ; CGP-NEXT: .LBB8_7:
1695 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v10
1696 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
1697 ; CGP-NEXT: v_subb_u32_e32 v6, vcc, 0, v10, vcc
1698 ; CGP-NEXT: v_mac_f32_e32 v4, 0x4f800000, v2
1699 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
1700 ; CGP-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
1701 ; CGP-NEXT: v_mul_f32_e32 v4, 0x2f800000, v2
1702 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
1703 ; CGP-NEXT: v_mac_f32_e32 v2, 0xcf800000, v4
1704 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
1705 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
1706 ; CGP-NEXT: v_mul_lo_u32 v8, v3, v4
1707 ; CGP-NEXT: v_mul_lo_u32 v11, v3, v2
1708 ; CGP-NEXT: v_mul_lo_u32 v12, v6, v2
1709 ; CGP-NEXT: v_mul_hi_u32 v13, v3, v2
1710 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v12, v8
1711 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v11
1712 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v11
1713 ; CGP-NEXT: v_mul_hi_u32 v11, v4, v11
1714 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v13
1715 ; CGP-NEXT: v_mul_lo_u32 v13, v2, v8
1716 ; CGP-NEXT: v_mul_lo_u32 v15, v4, v8
1717 ; CGP-NEXT: v_mul_hi_u32 v16, v2, v8
1718 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8
1719 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v13
1720 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1721 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v15, v11
1722 ; CGP-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1723 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v12, v14
1724 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1725 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v16
1726 ; CGP-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
1727 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1728 ; CGP-NEXT: v_add_i32_e32 v13, vcc, v15, v14
1729 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1730 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1731 ; CGP-NEXT: v_add_i32_e32 v12, vcc, v13, v12
1732 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v12
1733 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v11
1734 ; CGP-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc
1735 ; CGP-NEXT: v_mul_lo_u32 v8, v3, v2
1736 ; CGP-NEXT: v_mul_lo_u32 v6, v6, v2
1737 ; CGP-NEXT: v_mul_hi_u32 v11, v3, v2
1738 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v4
1739 ; CGP-NEXT: v_mul_lo_u32 v12, v4, v8
1740 ; CGP-NEXT: v_mul_hi_u32 v13, v2, v8
1741 ; CGP-NEXT: v_mul_hi_u32 v8, v4, v8
1742 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v6, v3
1743 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v11
1744 ; CGP-NEXT: v_mul_lo_u32 v6, v2, v3
1745 ; CGP-NEXT: v_mul_lo_u32 v11, v4, v3
1746 ; CGP-NEXT: v_mul_hi_u32 v14, v2, v3
1747 ; CGP-NEXT: v_mul_hi_u32 v3, v4, v3
1748 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
1749 ; CGP-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
1750 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
1751 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1752 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v6, v13
1753 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1754 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v8, v14
1755 ; CGP-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
1756 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v12, v6
1757 ; CGP-NEXT: v_add_i32_e32 v11, vcc, v11, v13
1758 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v8, v6
1759 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1760 ; CGP-NEXT: v_add_i32_e32 v8, vcc, v11, v8
1761 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v8
1762 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v6
1763 ; CGP-NEXT: v_addc_u32_e32 v3, vcc, v4, v3, vcc
1764 ; CGP-NEXT: v_mul_lo_u32 v4, v7, v2
1765 ; CGP-NEXT: v_mul_hi_u32 v6, v5, v2
1766 ; CGP-NEXT: v_mul_hi_u32 v2, v7, v2
1767 ; CGP-NEXT: v_mul_lo_u32 v8, v5, v3
1768 ; CGP-NEXT: v_mul_lo_u32 v11, v7, v3
1769 ; CGP-NEXT: v_mul_hi_u32 v12, v5, v3
1770 ; CGP-NEXT: v_mul_hi_u32 v3, v7, v3
1771 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v8
1772 ; CGP-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1773 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v11, v2
1774 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1775 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v6
1776 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1777 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v12
1778 ; CGP-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc
1779 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
1780 ; CGP-NEXT: v_add_i32_e32 v6, vcc, v11, v6
1781 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v4
1782 ; CGP-NEXT: v_cndmask_b32_e64 v4, 0, 1, vcc
1783 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v6, v4
1784 ; CGP-NEXT: v_mul_lo_u32 v6, v9, v2
1785 ; CGP-NEXT: v_mul_lo_u32 v8, v10, v2
1786 ; CGP-NEXT: v_mul_hi_u32 v11, v9, v2
1787 ; CGP-NEXT: v_add_i32_e32 v3, vcc, v3, v4
1788 ; CGP-NEXT: v_mul_lo_u32 v4, v9, v3
1789 ; CGP-NEXT: v_add_i32_e32 v12, vcc, 1, v2
1790 ; CGP-NEXT: v_addc_u32_e32 v13, vcc, 0, v3, vcc
1791 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v8, v4
1792 ; CGP-NEXT: v_add_i32_e32 v8, vcc, 1, v12
1793 ; CGP-NEXT: v_addc_u32_e32 v14, vcc, 0, v13, vcc
1794 ; CGP-NEXT: v_add_i32_e32 v4, vcc, v4, v11
1795 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v6
1796 ; CGP-NEXT: v_subb_u32_e64 v6, s[4:5], v7, v4, vcc
1797 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v7, v4
1798 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v5, v9
1799 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5]
1800 ; CGP-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10
1801 ; CGP-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[4:5]
1802 ; CGP-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc
1803 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v6, v10
1804 ; CGP-NEXT: v_cndmask_b32_e32 v6, v11, v7, vcc
1805 ; CGP-NEXT: v_sub_i32_e32 v5, vcc, v5, v9
1806 ; CGP-NEXT: v_subbrev_u32_e32 v4, vcc, 0, v4, vcc
1807 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v5, v9
1808 ; CGP-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc
1809 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v10
1810 ; CGP-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc
1811 ; CGP-NEXT: v_cmp_eq_u32_e32 vcc, v4, v10
1812 ; CGP-NEXT: v_cndmask_b32_e32 v4, v7, v5, vcc
1813 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
1814 ; CGP-NEXT: v_cndmask_b32_e32 v4, v12, v8, vcc
1815 ; CGP-NEXT: v_cndmask_b32_e32 v5, v13, v14, vcc
1816 ; CGP-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
1817 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1818 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
1819 ; CGP-NEXT: ; implicit-def: $vgpr4
1820 ; CGP-NEXT: ; implicit-def: $vgpr9_vgpr10
1821 ; CGP-NEXT: ; implicit-def: $vgpr5
1822 ; CGP-NEXT: s_andn2_saveexec_b64 s[6:7], s[6:7]
1823 ; CGP-NEXT: s_cbranch_execz .LBB8_6
1824 ; CGP-NEXT: .LBB8_8:
1825 ; CGP-NEXT: v_rcp_iflag_f32_e32 v2, v4
1826 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, 0, v9
1827 ; CGP-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1828 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
1829 ; CGP-NEXT: v_mul_lo_u32 v3, v3, v2
1830 ; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
1831 ; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1832 ; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
1833 ; CGP-NEXT: v_mul_lo_u32 v3, v2, v9
1834 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
1835 ; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
1836 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
1837 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1838 ; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9
1839 ; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
1840 ; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
1841 ; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
1842 ; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1843 ; CGP-NEXT: v_mov_b32_e32 v3, 0
1844 ; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
1845 ; CGP-NEXT: s_setpc_b64 s[30:31]
1846 %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
1847 %r = udiv <2 x i64> %x, %shl.y
1851 define i64 @v_udiv_i64_24bit(i64 %num, i64 %den) {
1852 ; GISEL-LABEL: v_udiv_i64_24bit:
1854 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1855 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1856 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v2
1857 ; GISEL-NEXT: v_cvt_f32_u32_e32 v2, v1
1858 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, 0, v1
1859 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v2, v2
1860 ; GISEL-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
1861 ; GISEL-NEXT: v_cvt_u32_f32_e32 v2, v2
1862 ; GISEL-NEXT: v_mul_lo_u32 v3, v3, v2
1863 ; GISEL-NEXT: v_mul_hi_u32 v3, v2, v3
1864 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v3
1865 ; GISEL-NEXT: v_mul_hi_u32 v2, v0, v2
1866 ; GISEL-NEXT: v_mul_lo_u32 v3, v2, v1
1867 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v2
1868 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
1869 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
1870 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
1871 ; GISEL-NEXT: v_sub_i32_e64 v3, s[4:5], v0, v1
1872 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
1873 ; GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v2
1874 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1
1875 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc
1876 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
1877 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1879 ; CGP-LABEL: v_udiv_i64_24bit:
1881 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1882 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1883 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v2
1884 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0
1885 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1
1886 ; CGP-NEXT: v_rcp_f32_e32 v2, v1
1887 ; CGP-NEXT: v_mul_f32_e32 v2, v0, v2
1888 ; CGP-NEXT: v_trunc_f32_e32 v2, v2
1889 ; CGP-NEXT: v_mad_f32 v0, -v2, v1, v0
1890 ; CGP-NEXT: v_cvt_u32_f32_e32 v2, v2
1891 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v1
1892 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1893 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v2, v0
1894 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
1895 ; CGP-NEXT: v_mov_b32_e32 v1, 0
1896 ; CGP-NEXT: s_setpc_b64 s[30:31]
1897 %num.mask = and i64 %num, 16777215
1898 %den.mask = and i64 %den, 16777215
1899 %result = udiv i64 %num.mask, %den.mask
1903 define <2 x i64> @v_udiv_v2i64_24bit(<2 x i64> %num, <2 x i64> %den) {
1904 ; GISEL-LABEL: v_udiv_v2i64_24bit:
1906 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1907 ; GISEL-NEXT: v_and_b32_e32 v3, 0xffffff, v0
1908 ; GISEL-NEXT: v_and_b32_e32 v2, 0xffffff, v2
1909 ; GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v4
1910 ; GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v6
1911 ; GISEL-NEXT: v_cvt_f32_ubyte0_e32 v6, 0
1912 ; GISEL-NEXT: v_cvt_f32_u32_e32 v7, v1
1913 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, 0, v1
1914 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], 0, 0, vcc
1915 ; GISEL-NEXT: v_cvt_f32_u32_e32 v8, v0
1916 ; GISEL-NEXT: v_sub_i32_e32 v9, vcc, 0, v0
1917 ; GISEL-NEXT: v_subb_u32_e64 v10, s[4:5], 0, 0, vcc
1918 ; GISEL-NEXT: v_mac_f32_e32 v7, 0x4f800000, v6
1919 ; GISEL-NEXT: v_mac_f32_e32 v8, 0x4f800000, v6
1920 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v6, v7
1921 ; GISEL-NEXT: v_rcp_iflag_f32_e32 v7, v8
1922 ; GISEL-NEXT: v_mul_f32_e32 v6, 0x5f7ffffc, v6
1923 ; GISEL-NEXT: v_mul_f32_e32 v7, 0x5f7ffffc, v7
1924 ; GISEL-NEXT: v_mul_f32_e32 v8, 0x2f800000, v6
1925 ; GISEL-NEXT: v_mul_f32_e32 v11, 0x2f800000, v7
1926 ; GISEL-NEXT: v_trunc_f32_e32 v8, v8
1927 ; GISEL-NEXT: v_trunc_f32_e32 v11, v11
1928 ; GISEL-NEXT: v_mac_f32_e32 v6, 0xcf800000, v8
1929 ; GISEL-NEXT: v_cvt_u32_f32_e32 v8, v8
1930 ; GISEL-NEXT: v_mac_f32_e32 v7, 0xcf800000, v11
1931 ; GISEL-NEXT: v_cvt_u32_f32_e32 v11, v11
1932 ; GISEL-NEXT: v_cvt_u32_f32_e32 v6, v6
1933 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v8
1934 ; GISEL-NEXT: v_cvt_u32_f32_e32 v7, v7
1935 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v11
1936 ; GISEL-NEXT: v_mul_lo_u32 v14, v4, v6
1937 ; GISEL-NEXT: v_mul_lo_u32 v15, v5, v6
1938 ; GISEL-NEXT: v_mul_hi_u32 v16, v4, v6
1939 ; GISEL-NEXT: v_mul_lo_u32 v17, v9, v7
1940 ; GISEL-NEXT: v_mul_lo_u32 v18, v10, v7
1941 ; GISEL-NEXT: v_mul_hi_u32 v19, v9, v7
1942 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v12
1943 ; GISEL-NEXT: v_mul_lo_u32 v15, v8, v14
1944 ; GISEL-NEXT: v_mul_hi_u32 v20, v6, v14
1945 ; GISEL-NEXT: v_mul_hi_u32 v14, v8, v14
1946 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v18, v13
1947 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v17
1948 ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v17
1949 ; GISEL-NEXT: v_mul_hi_u32 v17, v11, v17
1950 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v16
1951 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v19
1952 ; GISEL-NEXT: v_mul_lo_u32 v16, v6, v12
1953 ; GISEL-NEXT: v_mul_lo_u32 v19, v8, v12
1954 ; GISEL-NEXT: v_mul_hi_u32 v22, v6, v12
1955 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
1956 ; GISEL-NEXT: v_mul_lo_u32 v23, v7, v13
1957 ; GISEL-NEXT: v_mul_lo_u32 v24, v11, v13
1958 ; GISEL-NEXT: v_mul_hi_u32 v25, v7, v13
1959 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
1960 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v16
1961 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
1962 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v19, v14
1963 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1964 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v23
1965 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
1966 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v24, v17
1967 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
1968 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v20
1969 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1970 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v22
1971 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
1972 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v18, v21
1973 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1974 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v25
1975 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
1976 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
1977 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v20
1978 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, v23, v18
1979 ; GISEL-NEXT: v_add_i32_e32 v19, vcc, v24, v21
1980 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v15
1981 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
1982 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, v17, v18
1983 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1984 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v16, v15
1985 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, v19, v18
1986 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v15
1987 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v16
1988 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
1989 ; GISEL-NEXT: v_addc_u32_e32 v8, vcc, v8, v12, vcc
1990 ; GISEL-NEXT: v_mul_lo_u32 v12, v4, v6
1991 ; GISEL-NEXT: v_mul_lo_u32 v5, v5, v6
1992 ; GISEL-NEXT: v_mul_hi_u32 v14, v4, v6
1993 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
1994 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, v11, v13, vcc
1995 ; GISEL-NEXT: v_mul_lo_u32 v13, v9, v7
1996 ; GISEL-NEXT: v_mul_lo_u32 v10, v10, v7
1997 ; GISEL-NEXT: v_mul_hi_u32 v15, v9, v7
1998 ; GISEL-NEXT: v_mul_lo_u32 v4, v4, v8
1999 ; GISEL-NEXT: v_mul_lo_u32 v16, v8, v12
2000 ; GISEL-NEXT: v_mul_hi_u32 v17, v6, v12
2001 ; GISEL-NEXT: v_mul_hi_u32 v12, v8, v12
2002 ; GISEL-NEXT: v_mul_lo_u32 v9, v9, v11
2003 ; GISEL-NEXT: v_mul_lo_u32 v18, v11, v13
2004 ; GISEL-NEXT: v_mul_hi_u32 v19, v7, v13
2005 ; GISEL-NEXT: v_mul_hi_u32 v13, v11, v13
2006 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
2007 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v10, v9
2008 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v14
2009 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v15
2010 ; GISEL-NEXT: v_mul_lo_u32 v9, v6, v4
2011 ; GISEL-NEXT: v_mul_lo_u32 v10, v8, v4
2012 ; GISEL-NEXT: v_mul_hi_u32 v14, v6, v4
2013 ; GISEL-NEXT: v_mul_hi_u32 v4, v8, v4
2014 ; GISEL-NEXT: v_mul_lo_u32 v15, v7, v5
2015 ; GISEL-NEXT: v_mul_lo_u32 v20, v11, v5
2016 ; GISEL-NEXT: v_mul_hi_u32 v21, v7, v5
2017 ; GISEL-NEXT: v_mul_hi_u32 v5, v11, v5
2018 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
2019 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
2020 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v12
2021 ; GISEL-NEXT: v_cndmask_b32_e64 v12, 0, 1, vcc
2022 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v18, v15
2023 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2024 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v20, v13
2025 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
2026 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v17
2027 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
2028 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v14
2029 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2030 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v15, v19
2031 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
2032 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v21
2033 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
2034 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v16, v9
2035 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v12, v14
2036 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v18, v15
2037 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, v20, v17
2038 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, v10, v9
2039 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2040 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v13, v14
2041 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, vcc
2042 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v12, v10
2043 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, v15, v14
2044 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v10
2045 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v12
2046 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v9
2047 ; GISEL-NEXT: v_addc_u32_e32 v4, vcc, v8, v4, vcc
2048 ; GISEL-NEXT: v_mul_lo_u32 v8, 0, v6
2049 ; GISEL-NEXT: v_mul_hi_u32 v9, v3, v6
2050 ; GISEL-NEXT: v_mul_hi_u32 v6, 0, v6
2051 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v13
2052 ; GISEL-NEXT: v_addc_u32_e32 v5, vcc, v11, v5, vcc
2053 ; GISEL-NEXT: v_mul_lo_u32 v10, 0, v7
2054 ; GISEL-NEXT: v_mul_hi_u32 v11, v2, v7
2055 ; GISEL-NEXT: v_mul_hi_u32 v7, 0, v7
2056 ; GISEL-NEXT: v_mul_lo_u32 v12, v3, v4
2057 ; GISEL-NEXT: v_mul_lo_u32 v13, 0, v4
2058 ; GISEL-NEXT: v_mul_hi_u32 v14, v3, v4
2059 ; GISEL-NEXT: v_mul_hi_u32 v4, 0, v4
2060 ; GISEL-NEXT: v_mul_lo_u32 v15, v2, v5
2061 ; GISEL-NEXT: v_mul_lo_u32 v16, 0, v5
2062 ; GISEL-NEXT: v_mul_hi_u32 v17, v2, v5
2063 ; GISEL-NEXT: v_mul_hi_u32 v5, 0, v5
2064 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v12
2065 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v13, v6
2066 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v15
2067 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v16, v7
2068 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
2069 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2070 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v14
2071 ; GISEL-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
2072 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v10, v11
2073 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2074 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v17
2075 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
2076 ; GISEL-NEXT: v_add_i32_e32 v6, vcc, v6, v8
2077 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
2078 ; GISEL-NEXT: v_add_i32_e32 v7, vcc, v7, v10
2079 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
2080 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v9, v8
2081 ; GISEL-NEXT: v_mul_lo_u32 v9, v1, v6
2082 ; GISEL-NEXT: v_mul_lo_u32 v12, 0, v6
2083 ; GISEL-NEXT: v_mul_hi_u32 v13, v1, v6
2084 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, v11, v10
2085 ; GISEL-NEXT: v_mul_lo_u32 v11, v0, v7
2086 ; GISEL-NEXT: v_mul_lo_u32 v14, 0, v7
2087 ; GISEL-NEXT: v_mul_hi_u32 v15, v0, v7
2088 ; GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v8
2089 ; GISEL-NEXT: v_add_i32_e32 v5, vcc, v5, v10
2090 ; GISEL-NEXT: v_mul_lo_u32 v8, v1, v4
2091 ; GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v6
2092 ; GISEL-NEXT: v_addc_u32_e32 v16, vcc, 0, v4, vcc
2093 ; GISEL-NEXT: v_mul_lo_u32 v17, v0, v5
2094 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, 1, v7
2095 ; GISEL-NEXT: v_addc_u32_e32 v19, vcc, 0, v5, vcc
2096 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v12, v8
2097 ; GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v10
2098 ; GISEL-NEXT: v_addc_u32_e32 v20, vcc, 0, v16, vcc
2099 ; GISEL-NEXT: v_add_i32_e32 v14, vcc, v14, v17
2100 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, 1, v18
2101 ; GISEL-NEXT: v_addc_u32_e32 v21, vcc, 0, v19, vcc
2102 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v13
2103 ; GISEL-NEXT: v_add_i32_e32 v13, vcc, v14, v15
2104 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v9
2105 ; GISEL-NEXT: v_subb_u32_e64 v9, s[4:5], 0, v8, vcc
2106 ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 0, v8
2107 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[4:5], v3, v1
2108 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[4:5]
2109 ; GISEL-NEXT: v_sub_i32_e64 v2, s[4:5], v2, v11
2110 ; GISEL-NEXT: v_subb_u32_e64 v11, s[6:7], 0, v13, s[4:5]
2111 ; GISEL-NEXT: v_sub_i32_e64 v13, s[6:7], 0, v13
2112 ; GISEL-NEXT: v_cmp_ge_u32_e64 s[6:7], v2, v0
2113 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[6:7]
2114 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[6:7], 0, v9
2115 ; GISEL-NEXT: v_cndmask_b32_e64 v9, -1, v14, s[6:7]
2116 ; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
2117 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v11
2118 ; GISEL-NEXT: v_cndmask_b32_e32 v11, -1, v15, vcc
2119 ; GISEL-NEXT: v_subbrev_u32_e64 v13, vcc, 0, v13, s[4:5]
2120 ; GISEL-NEXT: v_sub_i32_e32 v3, vcc, v3, v1
2121 ; GISEL-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v8, vcc
2122 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v3, v1
2123 ; GISEL-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc
2124 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v2, v0
2125 ; GISEL-NEXT: v_subbrev_u32_e32 v3, vcc, 0, v13, vcc
2126 ; GISEL-NEXT: v_cmp_ge_u32_e32 vcc, v2, v0
2127 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc
2128 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
2129 ; GISEL-NEXT: v_cndmask_b32_e32 v1, -1, v1, vcc
2130 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3
2131 ; GISEL-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
2132 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
2133 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v10, v12, vcc
2134 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0
2135 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v18, v17, s[4:5]
2136 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v16, v20, vcc
2137 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
2138 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v6, v1, vcc
2139 ; GISEL-NEXT: v_cndmask_b32_e64 v6, v19, v21, s[4:5]
2140 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v11
2141 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v7, v2, s[4:5]
2142 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc
2143 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v5, v6, s[4:5]
2144 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2146 ; CGP-LABEL: v_udiv_v2i64_24bit:
2148 ; CGP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2149 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2150 ; CGP-NEXT: v_and_b32_e32 v1, 0xffffff, v2
2151 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v4
2152 ; CGP-NEXT: v_and_b32_e32 v3, 0xffffff, v6
2153 ; CGP-NEXT: v_cvt_f32_u32_e32 v0, v0
2154 ; CGP-NEXT: v_cvt_f32_u32_e32 v2, v2
2155 ; CGP-NEXT: v_cvt_f32_u32_e32 v1, v1
2156 ; CGP-NEXT: v_cvt_f32_u32_e32 v3, v3
2157 ; CGP-NEXT: v_rcp_f32_e32 v4, v2
2158 ; CGP-NEXT: v_rcp_f32_e32 v5, v3
2159 ; CGP-NEXT: v_mul_f32_e32 v4, v0, v4
2160 ; CGP-NEXT: v_mul_f32_e32 v5, v1, v5
2161 ; CGP-NEXT: v_trunc_f32_e32 v4, v4
2162 ; CGP-NEXT: v_trunc_f32_e32 v5, v5
2163 ; CGP-NEXT: v_mad_f32 v0, -v4, v2, v0
2164 ; CGP-NEXT: v_cvt_u32_f32_e32 v4, v4
2165 ; CGP-NEXT: v_mad_f32 v1, -v5, v3, v1
2166 ; CGP-NEXT: v_cvt_u32_f32_e32 v5, v5
2167 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v0|, v2
2168 ; CGP-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
2169 ; CGP-NEXT: v_cmp_ge_f32_e64 s[4:5], |v1|, v3
2170 ; CGP-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5]
2171 ; CGP-NEXT: v_add_i32_e32 v0, vcc, v4, v0
2172 ; CGP-NEXT: v_add_i32_e32 v1, vcc, v5, v1
2173 ; CGP-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2174 ; CGP-NEXT: v_and_b32_e32 v2, 0xffffff, v1
2175 ; CGP-NEXT: v_mov_b32_e32 v1, 0
2176 ; CGP-NEXT: v_mov_b32_e32 v3, 0
2177 ; CGP-NEXT: s_setpc_b64 s[30:31]
2178 %num.mask = and <2 x i64> %num, <i64 16777215, i64 16777215>
2179 %den.mask = and <2 x i64> %den, <i64 16777215, i64 16777215>
2180 %result = udiv <2 x i64> %num.mask, %den.mask
2181 ret <2 x i64> %result