1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -o - %s | FileCheck -check-prefix=SDAG %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -o - %s | FileCheck -check-prefix=GISEL %s
5 define <2 x i128> @v_sdiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
6 ; SDAG-LABEL: v_sdiv_v2i128_vv:
7 ; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
8 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
9 ; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 0, v0
10 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
11 ; SDAG-NEXT: v_ashrrev_i32_e32 v24, 31, v3
12 ; SDAG-NEXT: v_ashrrev_i32_e32 v25, 31, v11
13 ; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
14 ; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v1, vcc
15 ; SDAG-NEXT: v_mov_b32_e32 v26, v24
16 ; SDAG-NEXT: v_mov_b32_e32 v27, v25
17 ; SDAG-NEXT: v_subb_u32_e32 v19, vcc, 0, v2, vcc
18 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
19 ; SDAG-NEXT: v_cndmask_b32_e64 v21, v1, v17, s[4:5]
20 ; SDAG-NEXT: v_cndmask_b32_e64 v20, v0, v16, s[4:5]
21 ; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v3, vcc
22 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v2, v19, s[4:5]
23 ; SDAG-NEXT: v_ffbh_u32_e32 v1, v20
24 ; SDAG-NEXT: v_ffbh_u32_e32 v2, v21
25 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v3, v0, s[4:5]
26 ; SDAG-NEXT: v_or_b32_e32 v0, v20, v16
27 ; SDAG-NEXT: v_sub_i32_e32 v3, vcc, 0, v8
28 ; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], 32, v1
29 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v16
30 ; SDAG-NEXT: v_or_b32_e32 v1, v21, v17
31 ; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v9, vcc
32 ; SDAG-NEXT: v_min_u32_e32 v2, v19, v2
33 ; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], 32, v22
34 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v17
35 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[0:1]
36 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[6:7], 0, v[10:11]
37 ; SDAG-NEXT: v_cndmask_b32_e64 v28, v9, v23, s[6:7]
38 ; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v10, vcc
39 ; SDAG-NEXT: v_cndmask_b32_e64 v29, v8, v3, s[6:7]
40 ; SDAG-NEXT: v_min_u32_e32 v1, v19, v22
41 ; SDAG-NEXT: v_add_i32_e64 v2, s[8:9], 64, v2
42 ; SDAG-NEXT: v_addc_u32_e64 v3, s[8:9], 0, 0, s[8:9]
43 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, 0, v11, vcc
44 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v10, v0, s[6:7]
45 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
46 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v3, 0, vcc
47 ; SDAG-NEXT: v_cndmask_b32_e32 v10, v2, v1, vcc
48 ; SDAG-NEXT: v_ffbh_u32_e32 v3, v29
49 ; SDAG-NEXT: v_ffbh_u32_e32 v19, v28
50 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v11, v8, s[6:7]
51 ; SDAG-NEXT: v_or_b32_e32 v2, v29, v0
52 ; SDAG-NEXT: v_add_i32_e32 v8, vcc, 32, v3
53 ; SDAG-NEXT: v_ffbh_u32_e32 v11, v0
54 ; SDAG-NEXT: v_or_b32_e32 v3, v28, v1
55 ; SDAG-NEXT: v_min_u32_e32 v8, v8, v19
56 ; SDAG-NEXT: v_add_i32_e32 v11, vcc, 32, v11
57 ; SDAG-NEXT: v_ffbh_u32_e32 v19, v1
58 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
59 ; SDAG-NEXT: v_min_u32_e32 v2, v11, v19
60 ; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 64, v8
61 ; SDAG-NEXT: v_addc_u32_e64 v8, s[6:7], 0, 0, s[6:7]
62 ; SDAG-NEXT: v_cmp_ne_u64_e64 s[6:7], 0, v[0:1]
63 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, s[6:7]
64 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v3, v2, s[6:7]
65 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
66 ; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v10
67 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v8, v9, vcc
68 ; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v2
69 ; SDAG-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v18, vcc
70 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[2:3]
71 ; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
72 ; SDAG-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v18, vcc
73 ; SDAG-NEXT: v_or_b32_e32 v8, v8, v10
74 ; SDAG-NEXT: v_or_b32_e32 v9, v3, v11
75 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
76 ; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
77 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
78 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
79 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v18, v19, s[4:5]
80 ; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
81 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
82 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
83 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v17, 0, s[4:5]
84 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
85 ; SDAG-NEXT: v_cndmask_b32_e64 v22, v16, 0, s[4:5]
86 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v21, 0, s[4:5]
87 ; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc
88 ; SDAG-NEXT: v_cndmask_b32_e64 v23, v20, 0, s[4:5]
89 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
90 ; SDAG-NEXT: s_cbranch_execz .LBB0_6
91 ; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
92 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v2
93 ; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v2
94 ; SDAG-NEXT: v_mov_b32_e32 v8, 0
95 ; SDAG-NEXT: v_mov_b32_e32 v9, 0
96 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v3, vcc
97 ; SDAG-NEXT: v_lshl_b64 v[18:19], v[20:21], v18
98 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v10, vcc
99 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc
100 ; SDAG-NEXT: v_or_b32_e32 v10, v30, v32
101 ; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v2
102 ; SDAG-NEXT: v_or_b32_e32 v11, v31, v33
103 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[16:17], v34
104 ; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v34
105 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[20:21], v34
106 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
107 ; SDAG-NEXT: v_lshr_b64 v[10:11], v[20:21], v35
108 ; SDAG-NEXT: v_or_b32_e32 v3, v3, v11
109 ; SDAG-NEXT: v_or_b32_e32 v2, v2, v10
110 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34
111 ; SDAG-NEXT: v_cndmask_b32_e64 v3, v19, v3, s[4:5]
112 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v18, v2, s[4:5]
113 ; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, v23, s[4:5]
114 ; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, v22, s[4:5]
115 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v34
116 ; SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v17, s[4:5]
117 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v16, s[4:5]
118 ; SDAG-NEXT: v_mov_b32_e32 v10, 0
119 ; SDAG-NEXT: v_mov_b32_e32 v11, 0
120 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
121 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
122 ; SDAG-NEXT: s_cbranch_execz .LBB0_5
123 ; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
124 ; SDAG-NEXT: v_lshr_b64 v[8:9], v[20:21], v30
125 ; SDAG-NEXT: v_sub_i32_e32 v10, vcc, 64, v30
126 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[16:17], v10
127 ; SDAG-NEXT: v_or_b32_e32 v11, v9, v11
128 ; SDAG-NEXT: v_or_b32_e32 v10, v8, v10
129 ; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30
130 ; SDAG-NEXT: v_subrev_i32_e64 v8, s[4:5], 64, v30
131 ; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v8
132 ; SDAG-NEXT: v_cndmask_b32_e32 v9, v9, v11, vcc
133 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v30
134 ; SDAG-NEXT: v_cndmask_b32_e64 v21, v9, v21, s[4:5]
135 ; SDAG-NEXT: v_cndmask_b32_e32 v8, v8, v10, vcc
136 ; SDAG-NEXT: v_cndmask_b32_e64 v20, v8, v20, s[4:5]
137 ; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v30
138 ; SDAG-NEXT: v_cndmask_b32_e32 v23, 0, v9, vcc
139 ; SDAG-NEXT: v_cndmask_b32_e32 v22, 0, v8, vcc
140 ; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
141 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v28, vcc
142 ; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v0, vcc
143 ; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v1, vcc
144 ; SDAG-NEXT: s_mov_b64 s[4:5], 0
145 ; SDAG-NEXT: v_mov_b32_e32 v16, 0
146 ; SDAG-NEXT: v_mov_b32_e32 v17, 0
147 ; SDAG-NEXT: v_mov_b32_e32 v10, 0
148 ; SDAG-NEXT: v_mov_b32_e32 v11, 0
149 ; SDAG-NEXT: v_mov_b32_e32 v9, 0
150 ; SDAG-NEXT: .LBB0_3: ; %udiv-do-while3
151 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
152 ; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v19
153 ; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
154 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
155 ; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v21
156 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
157 ; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v3
158 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
159 ; SDAG-NEXT: v_or_b32_e32 v19, v17, v19
160 ; SDAG-NEXT: v_or_b32_e32 v18, v16, v18
161 ; SDAG-NEXT: v_or_b32_e32 v16, v22, v38
162 ; SDAG-NEXT: v_or_b32_e32 v17, v20, v39
163 ; SDAG-NEXT: v_or_b32_e32 v2, v2, v8
164 ; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v34, v17
165 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v35, v21, vcc
166 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v36, v16, vcc
167 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v23, vcc
168 ; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8
169 ; SDAG-NEXT: v_and_b32_e32 v20, v8, v29
170 ; SDAG-NEXT: v_and_b32_e32 v22, v8, v28
171 ; SDAG-NEXT: v_and_b32_e32 v38, v8, v0
172 ; SDAG-NEXT: v_and_b32_e32 v39, v8, v1
173 ; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
174 ; SDAG-NEXT: v_sub_i32_e32 v20, vcc, v17, v20
175 ; SDAG-NEXT: v_subb_u32_e32 v21, vcc, v21, v22, vcc
176 ; SDAG-NEXT: v_subb_u32_e32 v22, vcc, v16, v38, vcc
177 ; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v23, v39, vcc
178 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30
179 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc
180 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc
181 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
182 ; SDAG-NEXT: v_or_b32_e32 v16, v30, v32
183 ; SDAG-NEXT: v_or_b32_e32 v17, v31, v33
184 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
185 ; SDAG-NEXT: v_or_b32_e32 v3, v11, v3
186 ; SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
187 ; SDAG-NEXT: v_or_b32_e32 v2, v10, v2
188 ; SDAG-NEXT: v_mov_b32_e32 v17, v9
189 ; SDAG-NEXT: v_mov_b32_e32 v16, v8
190 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
191 ; SDAG-NEXT: s_cbranch_execnz .LBB0_3
192 ; SDAG-NEXT: ; %bb.4: ; %Flow13
193 ; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
194 ; SDAG-NEXT: .LBB0_5: ; %Flow14
195 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
196 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[2:3], 1
197 ; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v19
198 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[18:19], 1
199 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v16
200 ; SDAG-NEXT: v_or_b32_e32 v18, v11, v1
201 ; SDAG-NEXT: v_or_b32_e32 v19, v9, v3
202 ; SDAG-NEXT: v_or_b32_e32 v22, v10, v0
203 ; SDAG-NEXT: v_or_b32_e32 v23, v8, v2
204 ; SDAG-NEXT: .LBB0_6: ; %Flow16
205 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
206 ; SDAG-NEXT: v_ashrrev_i32_e32 v16, 31, v7
207 ; SDAG-NEXT: v_ashrrev_i32_e32 v17, 31, v15
208 ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, 0, v4
209 ; SDAG-NEXT: v_mov_b32_e32 v8, 0
210 ; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
211 ; SDAG-NEXT: v_mov_b32_e32 v20, v16
212 ; SDAG-NEXT: v_mov_b32_e32 v21, v17
213 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v5, vcc
214 ; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v6, vcc
215 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
216 ; SDAG-NEXT: v_cndmask_b32_e64 v3, v5, v1, s[4:5]
217 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v4, v0, s[4:5]
218 ; SDAG-NEXT: v_subb_u32_e32 v0, vcc, 0, v7, vcc
219 ; SDAG-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[4:5]
220 ; SDAG-NEXT: v_ffbh_u32_e32 v1, v2
221 ; SDAG-NEXT: v_ffbh_u32_e32 v4, v3
222 ; SDAG-NEXT: v_cndmask_b32_e64 v7, v7, v0, s[4:5]
223 ; SDAG-NEXT: v_sub_i32_e32 v5, vcc, 0, v12
224 ; SDAG-NEXT: v_or_b32_e32 v0, v2, v6
225 ; SDAG-NEXT: v_ffbh_u32_e32 v9, v6
226 ; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], 32, v1
227 ; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v13, vcc
228 ; SDAG-NEXT: v_or_b32_e32 v1, v3, v7
229 ; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 32, v9
230 ; SDAG-NEXT: v_ffbh_u32_e32 v30, v7
231 ; SDAG-NEXT: v_min_u32_e32 v4, v10, v4
232 ; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v14, vcc
233 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
234 ; SDAG-NEXT: v_cndmask_b32_e64 v28, v13, v11, s[4:5]
235 ; SDAG-NEXT: v_cndmask_b32_e64 v29, v12, v5, s[4:5]
236 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[0:1]
237 ; SDAG-NEXT: v_min_u32_e32 v1, v9, v30
238 ; SDAG-NEXT: v_add_i32_e64 v4, s[8:9], 64, v4
239 ; SDAG-NEXT: v_addc_u32_e64 v5, s[8:9], 0, 0, s[8:9]
240 ; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v15, vcc
241 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v14, v10, s[4:5]
242 ; SDAG-NEXT: v_ffbh_u32_e32 v10, v29
243 ; SDAG-NEXT: v_ffbh_u32_e32 v11, v28
244 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
245 ; SDAG-NEXT: v_cndmask_b32_e64 v12, v5, 0, vcc
246 ; SDAG-NEXT: v_cndmask_b32_e32 v13, v4, v1, vcc
247 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v15, v9, s[4:5]
248 ; SDAG-NEXT: v_or_b32_e32 v4, v29, v0
249 ; SDAG-NEXT: v_ffbh_u32_e32 v9, v0
250 ; SDAG-NEXT: v_add_i32_e32 v10, vcc, 32, v10
251 ; SDAG-NEXT: v_or_b32_e32 v5, v28, v1
252 ; SDAG-NEXT: v_add_i32_e32 v9, vcc, 32, v9
253 ; SDAG-NEXT: v_ffbh_u32_e32 v14, v1
254 ; SDAG-NEXT: v_min_u32_e32 v10, v10, v11
255 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
256 ; SDAG-NEXT: v_min_u32_e32 v4, v9, v14
257 ; SDAG-NEXT: v_add_i32_e64 v5, s[4:5], 64, v10
258 ; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, s[4:5]
259 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
260 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
261 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v9, 0, vcc
262 ; SDAG-NEXT: v_cndmask_b32_e32 v4, v5, v4, vcc
263 ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v13
264 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v9, v12, vcc
265 ; SDAG-NEXT: v_xor_b32_e32 v9, 0x7f, v4
266 ; SDAG-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v8, vcc
267 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[4:5]
268 ; SDAG-NEXT: v_cndmask_b32_e64 v12, 0, 1, s[4:5]
269 ; SDAG-NEXT: v_subbrev_u32_e32 v11, vcc, 0, v8, vcc
270 ; SDAG-NEXT: v_or_b32_e32 v8, v9, v10
271 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
272 ; SDAG-NEXT: v_cndmask_b32_e64 v13, 0, 1, vcc
273 ; SDAG-NEXT: v_or_b32_e32 v9, v5, v11
274 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
275 ; SDAG-NEXT: v_cndmask_b32_e32 v12, v13, v12, vcc
276 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
277 ; SDAG-NEXT: v_and_b32_e32 v8, 1, v12
278 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
279 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
280 ; SDAG-NEXT: v_cndmask_b32_e64 v13, v7, 0, s[4:5]
281 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
282 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v6, 0, s[4:5]
283 ; SDAG-NEXT: v_cndmask_b32_e64 v14, v3, 0, s[4:5]
284 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v2, 0, s[4:5]
285 ; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
286 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
287 ; SDAG-NEXT: s_cbranch_execz .LBB0_12
288 ; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
289 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v4
290 ; SDAG-NEXT: v_sub_i32_e64 v12, s[4:5], 63, v4
291 ; SDAG-NEXT: v_mov_b32_e32 v8, 0
292 ; SDAG-NEXT: v_mov_b32_e32 v9, 0
293 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v5, vcc
294 ; SDAG-NEXT: v_lshl_b64 v[12:13], v[2:3], v12
295 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v10, vcc
296 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc
297 ; SDAG-NEXT: v_or_b32_e32 v10, v30, v32
298 ; SDAG-NEXT: v_sub_i32_e32 v34, vcc, 0x7f, v4
299 ; SDAG-NEXT: v_or_b32_e32 v11, v31, v33
300 ; SDAG-NEXT: v_lshl_b64 v[4:5], v[6:7], v34
301 ; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v34
302 ; SDAG-NEXT: v_lshl_b64 v[14:15], v[2:3], v34
303 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
304 ; SDAG-NEXT: v_lshr_b64 v[10:11], v[2:3], v35
305 ; SDAG-NEXT: v_or_b32_e32 v5, v5, v11
306 ; SDAG-NEXT: v_or_b32_e32 v4, v4, v10
307 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34
308 ; SDAG-NEXT: v_cndmask_b32_e64 v5, v13, v5, s[4:5]
309 ; SDAG-NEXT: v_cndmask_b32_e64 v4, v12, v4, s[4:5]
310 ; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v15, s[4:5]
311 ; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v14, s[4:5]
312 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v34
313 ; SDAG-NEXT: v_cndmask_b32_e64 v5, v5, v7, s[4:5]
314 ; SDAG-NEXT: v_cndmask_b32_e64 v4, v4, v6, s[4:5]
315 ; SDAG-NEXT: v_mov_b32_e32 v12, 0
316 ; SDAG-NEXT: v_mov_b32_e32 v13, 0
317 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
318 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
319 ; SDAG-NEXT: s_cbranch_execz .LBB0_11
320 ; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
321 ; SDAG-NEXT: v_lshr_b64 v[8:9], v[2:3], v30
322 ; SDAG-NEXT: v_sub_i32_e32 v35, vcc, 64, v30
323 ; SDAG-NEXT: v_subrev_i32_e32 v36, vcc, 64, v30
324 ; SDAG-NEXT: v_lshr_b64 v[37:38], v[6:7], v30
325 ; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v29
326 ; SDAG-NEXT: s_mov_b64 s[10:11], 0
327 ; SDAG-NEXT: v_mov_b32_e32 v14, 0
328 ; SDAG-NEXT: v_mov_b32_e32 v15, 0
329 ; SDAG-NEXT: v_mov_b32_e32 v12, 0
330 ; SDAG-NEXT: v_mov_b32_e32 v13, 0
331 ; SDAG-NEXT: v_lshl_b64 v[48:49], v[6:7], v35
332 ; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v36
333 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v28, vcc
334 ; SDAG-NEXT: v_or_b32_e32 v9, v9, v49
335 ; SDAG-NEXT: v_or_b32_e32 v8, v8, v48
336 ; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v0, vcc
337 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30
338 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v7, v9, s[4:5]
339 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v6, v8, s[4:5]
340 ; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v38, s[4:5]
341 ; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v37, s[4:5]
342 ; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v1, vcc
343 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
344 ; SDAG-NEXT: v_cndmask_b32_e32 v3, v9, v3, vcc
345 ; SDAG-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc
346 ; SDAG-NEXT: v_mov_b32_e32 v9, 0
347 ; SDAG-NEXT: .LBB0_9: ; %udiv-do-while
348 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
349 ; SDAG-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
350 ; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v3
351 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
352 ; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v5
353 ; SDAG-NEXT: v_lshl_b64 v[4:5], v[4:5], 1
354 ; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v11
355 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
356 ; SDAG-NEXT: v_or_b32_e32 v6, v6, v8
357 ; SDAG-NEXT: v_or_b32_e32 v2, v2, v38
358 ; SDAG-NEXT: v_or_b32_e32 v4, v4, v39
359 ; SDAG-NEXT: v_or_b32_e32 v5, v13, v5
360 ; SDAG-NEXT: v_or_b32_e32 v11, v15, v11
361 ; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v34, v2
362 ; SDAG-NEXT: v_or_b32_e32 v4, v12, v4
363 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v35, v3, vcc
364 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v36, v6, vcc
365 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v7, vcc
366 ; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8
367 ; SDAG-NEXT: v_and_b32_e32 v15, v8, v29
368 ; SDAG-NEXT: v_and_b32_e32 v38, v8, v28
369 ; SDAG-NEXT: v_and_b32_e32 v39, v8, v0
370 ; SDAG-NEXT: v_and_b32_e32 v48, v8, v1
371 ; SDAG-NEXT: v_sub_i32_e32 v2, vcc, v2, v15
372 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v38, vcc
373 ; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v39, vcc
374 ; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v48, vcc
375 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30
376 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc
377 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc
378 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
379 ; SDAG-NEXT: v_or_b32_e32 v38, v30, v32
380 ; SDAG-NEXT: v_or_b32_e32 v39, v31, v33
381 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[38:39]
382 ; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
383 ; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
384 ; SDAG-NEXT: v_or_b32_e32 v10, v14, v10
385 ; SDAG-NEXT: v_mov_b32_e32 v15, v9
386 ; SDAG-NEXT: v_mov_b32_e32 v14, v8
387 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
388 ; SDAG-NEXT: s_cbranch_execnz .LBB0_9
389 ; SDAG-NEXT: ; %bb.10: ; %Flow
390 ; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
391 ; SDAG-NEXT: .LBB0_11: ; %Flow11
392 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
393 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[4:5], 1
394 ; SDAG-NEXT: v_lshrrev_b32_e32 v4, 31, v11
395 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[10:11], 1
396 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v4
397 ; SDAG-NEXT: v_or_b32_e32 v13, v13, v1
398 ; SDAG-NEXT: v_or_b32_e32 v14, v9, v3
399 ; SDAG-NEXT: v_or_b32_e32 v9, v12, v0
400 ; SDAG-NEXT: v_or_b32_e32 v8, v8, v2
401 ; SDAG-NEXT: .LBB0_12: ; %Flow12
402 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
403 ; SDAG-NEXT: v_xor_b32_e32 v3, v27, v26
404 ; SDAG-NEXT: v_xor_b32_e32 v2, v25, v24
405 ; SDAG-NEXT: v_xor_b32_e32 v7, v21, v20
406 ; SDAG-NEXT: v_xor_b32_e32 v6, v17, v16
407 ; SDAG-NEXT: v_xor_b32_e32 v4, v18, v3
408 ; SDAG-NEXT: v_xor_b32_e32 v5, v22, v2
409 ; SDAG-NEXT: v_xor_b32_e32 v1, v19, v3
410 ; SDAG-NEXT: v_xor_b32_e32 v0, v23, v2
411 ; SDAG-NEXT: v_xor_b32_e32 v10, v13, v7
412 ; SDAG-NEXT: v_xor_b32_e32 v9, v9, v6
413 ; SDAG-NEXT: v_xor_b32_e32 v11, v14, v7
414 ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
415 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
416 ; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v5, v2, vcc
417 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v4, v3, vcc
418 ; SDAG-NEXT: v_xor_b32_e32 v4, v8, v6
419 ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v6
420 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v11, v7, vcc
421 ; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v9, v6, vcc
422 ; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v10, v7, vcc
423 ; SDAG-NEXT: s_setpc_b64 s[30:31]
425 ; GISEL-LABEL: v_sdiv_v2i128_vv:
426 ; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
427 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
428 ; GISEL-NEXT: v_ashrrev_i32_e32 v24, 31, v3
429 ; GISEL-NEXT: v_ashrrev_i32_e32 v25, 31, v11
430 ; GISEL-NEXT: v_mov_b32_e32 v16, 0x7f
431 ; GISEL-NEXT: v_mov_b32_e32 v17, 0
432 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
433 ; GISEL-NEXT: v_xor_b32_e32 v0, v24, v0
434 ; GISEL-NEXT: v_xor_b32_e32 v1, v24, v1
435 ; GISEL-NEXT: v_xor_b32_e32 v2, v24, v2
436 ; GISEL-NEXT: v_xor_b32_e32 v3, v24, v3
437 ; GISEL-NEXT: v_xor_b32_e32 v8, v25, v8
438 ; GISEL-NEXT: v_xor_b32_e32 v9, v25, v9
439 ; GISEL-NEXT: v_xor_b32_e32 v10, v25, v10
440 ; GISEL-NEXT: v_xor_b32_e32 v11, v25, v11
441 ; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v0, v24
442 ; GISEL-NEXT: v_subb_u32_e32 v19, vcc, v1, v24, vcc
443 ; GISEL-NEXT: v_sub_i32_e64 v26, s[4:5], v8, v25
444 ; GISEL-NEXT: v_subb_u32_e64 v27, s[4:5], v9, v25, s[4:5]
445 ; GISEL-NEXT: v_subb_u32_e32 v20, vcc, v2, v24, vcc
446 ; GISEL-NEXT: v_subb_u32_e32 v21, vcc, v3, v24, vcc
447 ; GISEL-NEXT: v_subb_u32_e64 v10, vcc, v10, v25, s[4:5]
448 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v11, v25, vcc
449 ; GISEL-NEXT: v_ffbh_u32_e32 v8, v27
450 ; GISEL-NEXT: v_ffbh_u32_e32 v9, v26
451 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v18
452 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v19
453 ; GISEL-NEXT: v_or_b32_e32 v0, v26, v10
454 ; GISEL-NEXT: v_or_b32_e32 v1, v27, v11
455 ; GISEL-NEXT: v_or_b32_e32 v2, v18, v20
456 ; GISEL-NEXT: v_or_b32_e32 v3, v19, v21
457 ; GISEL-NEXT: v_add_i32_e32 v9, vcc, 32, v9
458 ; GISEL-NEXT: v_add_i32_e32 v22, vcc, 32, v22
459 ; GISEL-NEXT: v_ffbh_u32_e32 v28, v10
460 ; GISEL-NEXT: v_ffbh_u32_e32 v29, v11
461 ; GISEL-NEXT: v_ffbh_u32_e32 v30, v20
462 ; GISEL-NEXT: v_ffbh_u32_e32 v31, v21
463 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
464 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
465 ; GISEL-NEXT: v_min_u32_e32 v0, v8, v9
466 ; GISEL-NEXT: v_min_u32_e32 v1, v23, v22
467 ; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 32, v28
468 ; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v30
469 ; GISEL-NEXT: v_min_u32_e32 v2, v29, v2
470 ; GISEL-NEXT: v_min_u32_e32 v3, v31, v3
471 ; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0
472 ; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 64, v1
473 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
474 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
475 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
476 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
477 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
478 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
479 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
480 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
481 ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
482 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
483 ; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v2
484 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[16:17]
485 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
486 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
487 ; GISEL-NEXT: v_or_b32_e32 v9, v3, v1
488 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
489 ; GISEL-NEXT: v_cndmask_b32_e64 v17, 0, 1, vcc
490 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
491 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
492 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
493 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
494 ; GISEL-NEXT: v_or_b32_e32 v9, v22, v16
495 ; GISEL-NEXT: v_or_b32_e32 v8, v9, v8
496 ; GISEL-NEXT: v_and_b32_e32 v9, 1, v9
497 ; GISEL-NEXT: v_and_b32_e32 v8, 1, v8
498 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v9
499 ; GISEL-NEXT: v_cndmask_b32_e64 v22, v18, 0, vcc
500 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8
501 ; GISEL-NEXT: v_cndmask_b32_e64 v8, v20, 0, vcc
502 ; GISEL-NEXT: v_cndmask_b32_e64 v9, v21, 0, vcc
503 ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1
504 ; GISEL-NEXT: v_cndmask_b32_e64 v23, v19, 0, vcc
505 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
506 ; GISEL-NEXT: s_cbranch_execz .LBB0_6
507 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
508 ; GISEL-NEXT: v_add_i32_e32 v28, vcc, 1, v2
509 ; GISEL-NEXT: v_addc_u32_e64 v29, s[4:5], 0, v3, vcc
510 ; GISEL-NEXT: v_sub_i32_e32 v32, vcc, 0x7f, v2
511 ; GISEL-NEXT: v_not_b32_e32 v2, 63
512 ; GISEL-NEXT: v_addc_u32_e64 v30, vcc, 0, v0, s[4:5]
513 ; GISEL-NEXT: v_addc_u32_e32 v31, vcc, 0, v1, vcc
514 ; GISEL-NEXT: v_add_i32_e64 v16, s[4:5], v32, v2
515 ; GISEL-NEXT: v_sub_i32_e64 v8, s[4:5], 64, v32
516 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[18:19], v32
517 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], v32
518 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
519 ; GISEL-NEXT: v_lshr_b64 v[8:9], v[18:19], v8
520 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[18:19], v16
521 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v32
522 ; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc
523 ; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc
524 ; GISEL-NEXT: v_or_b32_e32 v0, v8, v2
525 ; GISEL-NEXT: v_or_b32_e32 v1, v9, v3
526 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc
527 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc
528 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v32
529 ; GISEL-NEXT: v_cndmask_b32_e32 v8, v0, v20, vcc
530 ; GISEL-NEXT: v_cndmask_b32_e32 v9, v1, v21, vcc
531 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
532 ; GISEL-NEXT: v_mov_b32_e32 v0, s8
533 ; GISEL-NEXT: v_mov_b32_e32 v1, s9
534 ; GISEL-NEXT: v_mov_b32_e32 v2, s10
535 ; GISEL-NEXT: v_mov_b32_e32 v3, s11
536 ; GISEL-NEXT: s_and_saveexec_b64 s[8:9], s[4:5]
537 ; GISEL-NEXT: s_xor_b64 s[12:13], exec, s[8:9]
538 ; GISEL-NEXT: s_cbranch_execz .LBB0_5
539 ; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
540 ; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v28
541 ; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v28
542 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v28
543 ; GISEL-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v28
544 ; GISEL-NEXT: v_lshr_b64 v[0:1], v[20:21], v28
545 ; GISEL-NEXT: v_lshr_b64 v[2:3], v[18:19], v28
546 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[20:21], v22
547 ; GISEL-NEXT: v_or_b32_e32 v22, v2, v22
548 ; GISEL-NEXT: v_or_b32_e32 v23, v3, v23
549 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
550 ; GISEL-NEXT: v_lshr_b64 v[2:3], v[20:21], v32
551 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v22, vcc
552 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v23, vcc
553 ; GISEL-NEXT: v_cndmask_b32_e64 v18, v2, v18, s[4:5]
554 ; GISEL-NEXT: v_cndmask_b32_e64 v19, v3, v19, s[4:5]
555 ; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v0, vcc
556 ; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v1, vcc
557 ; GISEL-NEXT: v_add_i32_e32 v32, vcc, -1, v26
558 ; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v27, vcc
559 ; GISEL-NEXT: v_addc_u32_e32 v34, vcc, -1, v10, vcc
560 ; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v11, vcc
561 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
562 ; GISEL-NEXT: v_mov_b32_e32 v23, 0
563 ; GISEL-NEXT: v_mov_b32_e32 v0, s8
564 ; GISEL-NEXT: v_mov_b32_e32 v1, s9
565 ; GISEL-NEXT: v_mov_b32_e32 v2, s10
566 ; GISEL-NEXT: v_mov_b32_e32 v3, s11
567 ; GISEL-NEXT: .LBB0_3: ; %udiv-do-while3
568 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
569 ; GISEL-NEXT: v_lshrrev_b32_e32 v36, 31, v17
570 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[16:17], 1
571 ; GISEL-NEXT: v_or_b32_e32 v16, v0, v2
572 ; GISEL-NEXT: v_or_b32_e32 v17, v1, v3
573 ; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v19
574 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[18:19], 1
575 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1
576 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v22
577 ; GISEL-NEXT: v_lshrrev_b32_e32 v18, 31, v9
578 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v18
579 ; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v32, v0
580 ; GISEL-NEXT: v_subb_u32_e32 v18, vcc, v33, v1, vcc
581 ; GISEL-NEXT: v_subb_u32_e32 v18, vcc, v34, v2, vcc
582 ; GISEL-NEXT: v_subb_u32_e32 v18, vcc, v35, v3, vcc
583 ; GISEL-NEXT: v_ashrrev_i32_e32 v22, 31, v18
584 ; GISEL-NEXT: v_and_b32_e32 v18, v22, v26
585 ; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v0, v18
586 ; GISEL-NEXT: v_and_b32_e32 v0, v22, v27
587 ; GISEL-NEXT: v_subb_u32_e32 v19, vcc, v1, v0, vcc
588 ; GISEL-NEXT: v_and_b32_e32 v0, v22, v10
589 ; GISEL-NEXT: v_subb_u32_e32 v20, vcc, v2, v0, vcc
590 ; GISEL-NEXT: v_and_b32_e32 v0, v22, v11
591 ; GISEL-NEXT: v_subb_u32_e32 v21, vcc, v3, v0, vcc
592 ; GISEL-NEXT: v_add_i32_e32 v28, vcc, -1, v28
593 ; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc
594 ; GISEL-NEXT: v_addc_u32_e32 v30, vcc, -1, v30, vcc
595 ; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc
596 ; GISEL-NEXT: v_or_b32_e32 v0, v28, v30
597 ; GISEL-NEXT: v_or_b32_e32 v1, v29, v31
598 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
599 ; GISEL-NEXT: v_and_b32_e32 v22, 1, v22
600 ; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
601 ; GISEL-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
602 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v36
603 ; GISEL-NEXT: v_mov_b32_e32 v0, v22
604 ; GISEL-NEXT: v_mov_b32_e32 v1, v23
605 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[8:9]
606 ; GISEL-NEXT: s_cbranch_execnz .LBB0_3
607 ; GISEL-NEXT: ; %bb.4: ; %Flow13
608 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
609 ; GISEL-NEXT: .LBB0_5: ; %Flow14
610 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
611 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[16:17], 1
612 ; GISEL-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
613 ; GISEL-NEXT: v_lshrrev_b32_e32 v10, 31, v17
614 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v10
615 ; GISEL-NEXT: v_or_b32_e32 v22, v0, v2
616 ; GISEL-NEXT: v_or_b32_e32 v23, v1, v3
617 ; GISEL-NEXT: .LBB0_6: ; %Flow16
618 ; GISEL-NEXT: s_or_b64 exec, exec, s[6:7]
619 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
620 ; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v7
621 ; GISEL-NEXT: v_ashrrev_i32_e32 v19, 31, v15
622 ; GISEL-NEXT: v_mov_b32_e32 v10, 0x7f
623 ; GISEL-NEXT: v_mov_b32_e32 v11, 0
624 ; GISEL-NEXT: v_xor_b32_e32 v0, v18, v4
625 ; GISEL-NEXT: v_xor_b32_e32 v1, v18, v5
626 ; GISEL-NEXT: v_xor_b32_e32 v2, v18, v6
627 ; GISEL-NEXT: v_xor_b32_e32 v3, v18, v7
628 ; GISEL-NEXT: v_xor_b32_e32 v4, v19, v12
629 ; GISEL-NEXT: v_xor_b32_e32 v5, v19, v13
630 ; GISEL-NEXT: v_xor_b32_e32 v14, v19, v14
631 ; GISEL-NEXT: v_xor_b32_e32 v15, v19, v15
632 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v0, v18
633 ; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v1, v18, vcc
634 ; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], v4, v19
635 ; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], v5, v19, s[4:5]
636 ; GISEL-NEXT: v_subb_u32_e32 v12, vcc, v2, v18, vcc
637 ; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v18, vcc
638 ; GISEL-NEXT: v_subb_u32_e64 v4, vcc, v14, v19, s[4:5]
639 ; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v15, v19, vcc
640 ; GISEL-NEXT: v_ffbh_u32_e32 v14, v21
641 ; GISEL-NEXT: v_ffbh_u32_e32 v15, v20
642 ; GISEL-NEXT: v_ffbh_u32_e32 v16, v7
643 ; GISEL-NEXT: v_ffbh_u32_e32 v17, v6
644 ; GISEL-NEXT: v_or_b32_e32 v0, v20, v4
645 ; GISEL-NEXT: v_or_b32_e32 v1, v21, v5
646 ; GISEL-NEXT: v_or_b32_e32 v2, v6, v12
647 ; GISEL-NEXT: v_or_b32_e32 v3, v7, v13
648 ; GISEL-NEXT: v_add_i32_e32 v15, vcc, 32, v15
649 ; GISEL-NEXT: v_ffbh_u32_e32 v26, v5
650 ; GISEL-NEXT: v_ffbh_u32_e32 v27, v4
651 ; GISEL-NEXT: v_add_i32_e32 v17, vcc, 32, v17
652 ; GISEL-NEXT: v_ffbh_u32_e32 v28, v13
653 ; GISEL-NEXT: v_ffbh_u32_e32 v29, v12
654 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
655 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
656 ; GISEL-NEXT: v_min_u32_e32 v0, v14, v15
657 ; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v27
658 ; GISEL-NEXT: v_min_u32_e32 v2, v16, v17
659 ; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v29
660 ; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0
661 ; GISEL-NEXT: v_min_u32_e32 v1, v26, v1
662 ; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 64, v2
663 ; GISEL-NEXT: v_min_u32_e32 v3, v28, v3
664 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
665 ; GISEL-NEXT: v_cndmask_b32_e64 v14, 0, 1, s[4:5]
666 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
667 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
668 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
669 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc
670 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
671 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
672 ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
673 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
674 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[10:11]
675 ; GISEL-NEXT: v_cndmask_b32_e64 v15, 0, 1, vcc
676 ; GISEL-NEXT: v_xor_b32_e32 v10, 0x7f, v2
677 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
678 ; GISEL-NEXT: v_cndmask_b32_e64 v16, 0, 1, vcc
679 ; GISEL-NEXT: v_or_b32_e32 v10, v10, v0
680 ; GISEL-NEXT: v_or_b32_e32 v11, v3, v1
681 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
682 ; GISEL-NEXT: v_cndmask_b32_e32 v15, v16, v15, vcc
683 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
684 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
685 ; GISEL-NEXT: v_or_b32_e32 v11, v14, v15
686 ; GISEL-NEXT: v_and_b32_e32 v14, 1, v11
687 ; GISEL-NEXT: v_or_b32_e32 v10, v11, v10
688 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v14
689 ; GISEL-NEXT: v_cndmask_b32_e64 v14, v6, 0, vcc
690 ; GISEL-NEXT: v_and_b32_e32 v16, 1, v10
691 ; GISEL-NEXT: v_cndmask_b32_e64 v15, v7, 0, vcc
692 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v12, 0, vcc
693 ; GISEL-NEXT: v_cndmask_b32_e64 v11, v13, 0, vcc
694 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v16
695 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
696 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
697 ; GISEL-NEXT: s_cbranch_execz .LBB0_12
698 ; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
699 ; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v2
700 ; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v3, vcc
701 ; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v2
702 ; GISEL-NEXT: v_not_b32_e32 v2, 63
703 ; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v0, s[4:5]
704 ; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v1, vcc
705 ; GISEL-NEXT: v_add_i32_e64 v14, s[4:5], v30, v2
706 ; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v30
707 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[6:7], v30
708 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], v30
709 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
710 ; GISEL-NEXT: v_lshr_b64 v[10:11], v[6:7], v10
711 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v14
712 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30
713 ; GISEL-NEXT: v_cndmask_b32_e32 v14, 0, v0, vcc
714 ; GISEL-NEXT: v_cndmask_b32_e32 v15, 0, v1, vcc
715 ; GISEL-NEXT: v_or_b32_e32 v0, v10, v2
716 ; GISEL-NEXT: v_or_b32_e32 v1, v11, v3
717 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v16, v0, vcc
718 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v17, v1, vcc
719 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
720 ; GISEL-NEXT: v_cndmask_b32_e32 v10, v0, v12, vcc
721 ; GISEL-NEXT: v_cndmask_b32_e32 v11, v1, v13, vcc
722 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
723 ; GISEL-NEXT: v_mov_b32_e32 v0, s8
724 ; GISEL-NEXT: v_mov_b32_e32 v1, s9
725 ; GISEL-NEXT: v_mov_b32_e32 v2, s10
726 ; GISEL-NEXT: v_mov_b32_e32 v3, s11
727 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
728 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
729 ; GISEL-NEXT: s_cbranch_execz .LBB0_11
730 ; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
731 ; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
732 ; GISEL-NEXT: v_sub_i32_e32 v16, vcc, 64, v26
733 ; GISEL-NEXT: v_lshr_b64 v[0:1], v[12:13], v26
734 ; GISEL-NEXT: v_lshr_b64 v[2:3], v[6:7], v26
735 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
736 ; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v20
737 ; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v21, vcc
738 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[12:13], v16
739 ; GISEL-NEXT: v_lshr_b64 v[12:13], v[12:13], v32
740 ; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v4, vcc
741 ; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v5, vcc
742 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
743 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v16
744 ; GISEL-NEXT: v_or_b32_e32 v3, v3, v17
745 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
746 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v12, v2, vcc
747 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v13, v3, vcc
748 ; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v0, vcc
749 ; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v1, vcc
750 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26
751 ; GISEL-NEXT: v_cndmask_b32_e32 v12, v2, v6, vcc
752 ; GISEL-NEXT: v_cndmask_b32_e32 v13, v3, v7, vcc
753 ; GISEL-NEXT: v_mov_b32_e32 v7, 0
754 ; GISEL-NEXT: v_mov_b32_e32 v0, s4
755 ; GISEL-NEXT: v_mov_b32_e32 v1, s5
756 ; GISEL-NEXT: v_mov_b32_e32 v2, s6
757 ; GISEL-NEXT: v_mov_b32_e32 v3, s7
758 ; GISEL-NEXT: .LBB0_9: ; %udiv-do-while
759 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
760 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[12:13], 1
761 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
762 ; GISEL-NEXT: v_lshrrev_b32_e32 v6, 31, v13
763 ; GISEL-NEXT: v_lshrrev_b32_e32 v34, 31, v11
764 ; GISEL-NEXT: v_lshl_b64 v[12:13], v[14:15], 1
765 ; GISEL-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
766 ; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v15
767 ; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v26
768 ; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc
769 ; GISEL-NEXT: v_or_b32_e32 v16, v16, v6
770 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v34
771 ; GISEL-NEXT: v_or_b32_e32 v10, v10, v14
772 ; GISEL-NEXT: v_or_b32_e32 v14, v0, v12
773 ; GISEL-NEXT: v_or_b32_e32 v15, v1, v13
774 ; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc
775 ; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc
776 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v30, v2
777 ; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v31, v3, vcc
778 ; GISEL-NEXT: v_or_b32_e32 v0, v26, v28
779 ; GISEL-NEXT: v_or_b32_e32 v1, v27, v29
780 ; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v32, v16, vcc
781 ; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v33, v17, vcc
782 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
783 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v6
784 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
785 ; GISEL-NEXT: v_and_b32_e32 v6, 1, v0
786 ; GISEL-NEXT: v_and_b32_e32 v12, v0, v20
787 ; GISEL-NEXT: v_and_b32_e32 v13, v0, v21
788 ; GISEL-NEXT: v_and_b32_e32 v34, v0, v4
789 ; GISEL-NEXT: v_and_b32_e32 v35, v0, v5
790 ; GISEL-NEXT: v_mov_b32_e32 v0, v6
791 ; GISEL-NEXT: v_mov_b32_e32 v1, v7
792 ; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v2, v12
793 ; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v3, v13, vcc
794 ; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v34, vcc
795 ; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v35, vcc
796 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
797 ; GISEL-NEXT: s_cbranch_execnz .LBB0_9
798 ; GISEL-NEXT: ; %bb.10: ; %Flow
799 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
800 ; GISEL-NEXT: .LBB0_11: ; %Flow11
801 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
802 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1
803 ; GISEL-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
804 ; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v15
805 ; GISEL-NEXT: v_or_b32_e32 v10, v10, v4
806 ; GISEL-NEXT: v_or_b32_e32 v14, v0, v2
807 ; GISEL-NEXT: v_or_b32_e32 v15, v1, v3
808 ; GISEL-NEXT: .LBB0_12: ; %Flow12
809 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
810 ; GISEL-NEXT: v_xor_b32_e32 v3, v25, v24
811 ; GISEL-NEXT: v_xor_b32_e32 v7, v19, v18
812 ; GISEL-NEXT: v_xor_b32_e32 v0, v22, v3
813 ; GISEL-NEXT: v_xor_b32_e32 v1, v23, v3
814 ; GISEL-NEXT: v_xor_b32_e32 v2, v8, v3
815 ; GISEL-NEXT: v_xor_b32_e32 v6, v9, v3
816 ; GISEL-NEXT: v_xor_b32_e32 v4, v14, v7
817 ; GISEL-NEXT: v_xor_b32_e32 v5, v15, v7
818 ; GISEL-NEXT: v_xor_b32_e32 v8, v10, v7
819 ; GISEL-NEXT: v_xor_b32_e32 v9, v11, v7
820 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v3
821 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc
822 ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v7
823 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v7, s[4:5]
824 ; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v3, vcc
825 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v6, v3, vcc
826 ; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v8, v7, s[4:5]
827 ; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v9, v7, vcc
828 ; GISEL-NEXT: s_setpc_b64 s[30:31]
829 %shl = sdiv <2 x i128> %lhs, %rhs
833 define <2 x i128> @v_udiv_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
834 ; SDAG-LABEL: v_udiv_v2i128_vv:
835 ; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
836 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837 ; SDAG-NEXT: v_or_b32_e32 v17, v9, v11
838 ; SDAG-NEXT: v_or_b32_e32 v16, v8, v10
839 ; SDAG-NEXT: v_or_b32_e32 v19, v1, v3
840 ; SDAG-NEXT: v_or_b32_e32 v18, v0, v2
841 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v10
842 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v11
843 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v8
844 ; SDAG-NEXT: v_ffbh_u32_e32 v23, v9
845 ; SDAG-NEXT: v_ffbh_u32_e32 v24, v2
846 ; SDAG-NEXT: v_ffbh_u32_e32 v25, v3
847 ; SDAG-NEXT: v_ffbh_u32_e32 v26, v0
848 ; SDAG-NEXT: v_ffbh_u32_e32 v27, v1
849 ; SDAG-NEXT: v_mov_b32_e32 v28, 0
850 ; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f
851 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
852 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
853 ; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20
854 ; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v22
855 ; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v24
856 ; SDAG-NEXT: v_add_i32_e64 v19, s[6:7], 32, v26
857 ; SDAG-NEXT: v_min_u32_e32 v16, v16, v21
858 ; SDAG-NEXT: v_min_u32_e32 v17, v17, v23
859 ; SDAG-NEXT: v_min_u32_e32 v18, v18, v25
860 ; SDAG-NEXT: v_min_u32_e32 v19, v19, v27
861 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
862 ; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17
863 ; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
864 ; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19
865 ; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
866 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
867 ; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc
868 ; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
869 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
870 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc
871 ; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
872 ; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v16, v18
873 ; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v20, v17, vcc
874 ; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v22
875 ; SDAG-NEXT: v_subbrev_u32_e32 v24, vcc, 0, v28, vcc
876 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[22:23]
877 ; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, s[4:5]
878 ; SDAG-NEXT: v_subbrev_u32_e32 v25, vcc, 0, v28, vcc
879 ; SDAG-NEXT: v_or_b32_e32 v16, v16, v24
880 ; SDAG-NEXT: v_or_b32_e32 v17, v23, v25
881 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[24:25]
882 ; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
883 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
884 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[24:25]
885 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v19, v18, s[4:5]
886 ; SDAG-NEXT: v_and_b32_e32 v16, 1, v16
887 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16
888 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
889 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v3, 0, s[4:5]
890 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
891 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v2, 0, s[4:5]
892 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v1, 0, s[4:5]
893 ; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc
894 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v0, 0, s[4:5]
895 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
896 ; SDAG-NEXT: s_cbranch_execz .LBB1_6
897 ; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
898 ; SDAG-NEXT: v_add_i32_e32 v26, vcc, 1, v22
899 ; SDAG-NEXT: v_sub_i32_e64 v16, s[4:5], 63, v22
900 ; SDAG-NEXT: v_mov_b32_e32 v20, 0
901 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
902 ; SDAG-NEXT: v_addc_u32_e32 v27, vcc, 0, v23, vcc
903 ; SDAG-NEXT: v_lshl_b64 v[16:17], v[0:1], v16
904 ; SDAG-NEXT: v_addc_u32_e32 v28, vcc, 0, v24, vcc
905 ; SDAG-NEXT: v_addc_u32_e32 v29, vcc, 0, v25, vcc
906 ; SDAG-NEXT: v_or_b32_e32 v18, v26, v28
907 ; SDAG-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22
908 ; SDAG-NEXT: v_or_b32_e32 v19, v27, v29
909 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v30
910 ; SDAG-NEXT: v_sub_i32_e32 v31, vcc, 64, v30
911 ; SDAG-NEXT: v_lshl_b64 v[24:25], v[0:1], v30
912 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
913 ; SDAG-NEXT: v_lshr_b64 v[18:19], v[0:1], v31
914 ; SDAG-NEXT: v_or_b32_e32 v19, v23, v19
915 ; SDAG-NEXT: v_or_b32_e32 v18, v22, v18
916 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30
917 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v17, v19, s[4:5]
918 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v16, v18, s[4:5]
919 ; SDAG-NEXT: v_cndmask_b32_e64 v17, 0, v25, s[4:5]
920 ; SDAG-NEXT: v_cndmask_b32_e64 v16, 0, v24, s[4:5]
921 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v30
922 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v19, v3, s[4:5]
923 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v2, s[4:5]
924 ; SDAG-NEXT: v_mov_b32_e32 v22, 0
925 ; SDAG-NEXT: v_mov_b32_e32 v23, 0
926 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
927 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
928 ; SDAG-NEXT: s_cbranch_execz .LBB1_5
929 ; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
930 ; SDAG-NEXT: v_lshr_b64 v[20:21], v[0:1], v26
931 ; SDAG-NEXT: v_sub_i32_e32 v22, vcc, 64, v26
932 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[2:3], v22
933 ; SDAG-NEXT: v_or_b32_e32 v23, v21, v23
934 ; SDAG-NEXT: v_or_b32_e32 v22, v20, v22
935 ; SDAG-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
936 ; SDAG-NEXT: v_subrev_i32_e64 v20, s[4:5], 64, v26
937 ; SDAG-NEXT: v_lshr_b64 v[20:21], v[2:3], v20
938 ; SDAG-NEXT: v_cndmask_b32_e32 v21, v21, v23, vcc
939 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
940 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v21, v1, s[4:5]
941 ; SDAG-NEXT: v_cndmask_b32_e32 v20, v20, v22, vcc
942 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v20, v0, s[4:5]
943 ; SDAG-NEXT: v_lshr_b64 v[2:3], v[2:3], v26
944 ; SDAG-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc
945 ; SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc
946 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v8
947 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v9, vcc
948 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v10, vcc
949 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v11, vcc
950 ; SDAG-NEXT: s_mov_b64 s[4:5], 0
951 ; SDAG-NEXT: v_mov_b32_e32 v24, 0
952 ; SDAG-NEXT: v_mov_b32_e32 v25, 0
953 ; SDAG-NEXT: v_mov_b32_e32 v22, 0
954 ; SDAG-NEXT: v_mov_b32_e32 v23, 0
955 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
956 ; SDAG-NEXT: .LBB1_3: ; %udiv-do-while3
957 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
958 ; SDAG-NEXT: v_lshrrev_b32_e32 v34, 31, v17
959 ; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
960 ; SDAG-NEXT: v_or_b32_e32 v17, v25, v17
961 ; SDAG-NEXT: v_or_b32_e32 v16, v24, v16
962 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
963 ; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v1
964 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
965 ; SDAG-NEXT: v_or_b32_e32 v2, v2, v20
966 ; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v19
967 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v20
968 ; SDAG-NEXT: v_sub_i32_e32 v20, vcc, v30, v0
969 ; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v31, v1, vcc
970 ; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v32, v2, vcc
971 ; SDAG-NEXT: v_subb_u32_e32 v20, vcc, v33, v3, vcc
972 ; SDAG-NEXT: v_ashrrev_i32_e32 v20, 31, v20
973 ; SDAG-NEXT: v_and_b32_e32 v24, v20, v8
974 ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v24
975 ; SDAG-NEXT: v_and_b32_e32 v24, v20, v9
976 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v24, vcc
977 ; SDAG-NEXT: v_and_b32_e32 v24, v20, v10
978 ; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v24, vcc
979 ; SDAG-NEXT: v_and_b32_e32 v24, v20, v11
980 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v24, vcc
981 ; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v26
982 ; SDAG-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc
983 ; SDAG-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc
984 ; SDAG-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc
985 ; SDAG-NEXT: v_or_b32_e32 v24, v26, v28
986 ; SDAG-NEXT: v_or_b32_e32 v25, v27, v29
987 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[24:25]
988 ; SDAG-NEXT: v_and_b32_e32 v20, 1, v20
989 ; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
990 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v34
991 ; SDAG-NEXT: v_or_b32_e32 v19, v23, v19
992 ; SDAG-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
993 ; SDAG-NEXT: v_or_b32_e32 v18, v22, v18
994 ; SDAG-NEXT: v_mov_b32_e32 v25, v21
995 ; SDAG-NEXT: v_mov_b32_e32 v24, v20
996 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[4:5]
997 ; SDAG-NEXT: s_cbranch_execnz .LBB1_3
998 ; SDAG-NEXT: ; %bb.4: ; %Flow13
999 ; SDAG-NEXT: s_or_b64 exec, exec, s[4:5]
1000 ; SDAG-NEXT: .LBB1_5: ; %Flow14
1001 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
1002 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[18:19], 1
1003 ; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v17
1004 ; SDAG-NEXT: v_lshl_b64 v[2:3], v[16:17], 1
1005 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v8
1006 ; SDAG-NEXT: v_or_b32_e32 v16, v23, v1
1007 ; SDAG-NEXT: v_or_b32_e32 v18, v21, v3
1008 ; SDAG-NEXT: v_or_b32_e32 v17, v22, v0
1009 ; SDAG-NEXT: v_or_b32_e32 v19, v20, v2
1010 ; SDAG-NEXT: .LBB1_6: ; %Flow16
1011 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
1012 ; SDAG-NEXT: v_or_b32_e32 v1, v13, v15
1013 ; SDAG-NEXT: v_or_b32_e32 v0, v12, v14
1014 ; SDAG-NEXT: v_or_b32_e32 v3, v5, v7
1015 ; SDAG-NEXT: v_or_b32_e32 v2, v4, v6
1016 ; SDAG-NEXT: v_ffbh_u32_e32 v8, v14
1017 ; SDAG-NEXT: v_ffbh_u32_e32 v9, v15
1018 ; SDAG-NEXT: v_ffbh_u32_e32 v10, v12
1019 ; SDAG-NEXT: v_ffbh_u32_e32 v11, v13
1020 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v6
1021 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v7
1022 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v4
1023 ; SDAG-NEXT: v_ffbh_u32_e32 v23, v5
1024 ; SDAG-NEXT: v_mov_b32_e32 v24, 0
1025 ; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f
1026 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1027 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
1028 ; SDAG-NEXT: v_add_i32_e64 v0, s[6:7], 32, v8
1029 ; SDAG-NEXT: v_add_i32_e64 v1, s[6:7], 32, v10
1030 ; SDAG-NEXT: v_add_i32_e64 v2, s[6:7], 32, v20
1031 ; SDAG-NEXT: v_add_i32_e64 v3, s[6:7], 32, v22
1032 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
1033 ; SDAG-NEXT: v_min_u32_e32 v0, v0, v9
1034 ; SDAG-NEXT: v_min_u32_e32 v1, v1, v11
1035 ; SDAG-NEXT: v_min_u32_e32 v2, v2, v21
1036 ; SDAG-NEXT: v_min_u32_e32 v3, v3, v23
1037 ; SDAG-NEXT: v_add_i32_e32 v1, vcc, 64, v1
1038 ; SDAG-NEXT: v_addc_u32_e64 v8, s[4:5], 0, 0, vcc
1039 ; SDAG-NEXT: v_add_i32_e32 v3, vcc, 64, v3
1040 ; SDAG-NEXT: v_addc_u32_e64 v9, s[4:5], 0, 0, vcc
1041 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
1042 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v8, 0, vcc
1043 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1044 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
1045 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, 0, vcc
1046 ; SDAG-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
1047 ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v2
1048 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v8, v1, vcc
1049 ; SDAG-NEXT: v_xor_b32_e32 v2, 0x7f, v0
1050 ; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v24, vcc
1051 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[0:1]
1052 ; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[4:5]
1053 ; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v24, vcc
1054 ; SDAG-NEXT: v_or_b32_e32 v2, v2, v20
1055 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
1056 ; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, 1, vcc
1057 ; SDAG-NEXT: v_or_b32_e32 v3, v1, v21
1058 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
1059 ; SDAG-NEXT: v_cndmask_b32_e32 v8, v9, v8, vcc
1060 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
1061 ; SDAG-NEXT: v_and_b32_e32 v2, 1, v8
1062 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v2
1063 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
1064 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v7, 0, s[4:5]
1065 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
1066 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v6, 0, s[4:5]
1067 ; SDAG-NEXT: v_cndmask_b32_e64 v10, v5, 0, s[4:5]
1068 ; SDAG-NEXT: v_cndmask_b32_e64 v11, v4, 0, s[4:5]
1069 ; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
1070 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
1071 ; SDAG-NEXT: s_cbranch_execz .LBB1_12
1072 ; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
1073 ; SDAG-NEXT: v_add_i32_e32 v22, vcc, 1, v0
1074 ; SDAG-NEXT: v_sub_i32_e64 v8, s[4:5], 63, v0
1075 ; SDAG-NEXT: v_mov_b32_e32 v2, 0
1076 ; SDAG-NEXT: v_mov_b32_e32 v3, 0
1077 ; SDAG-NEXT: v_addc_u32_e32 v23, vcc, 0, v1, vcc
1078 ; SDAG-NEXT: v_lshl_b64 v[8:9], v[4:5], v8
1079 ; SDAG-NEXT: v_addc_u32_e32 v24, vcc, 0, v20, vcc
1080 ; SDAG-NEXT: v_addc_u32_e32 v25, vcc, 0, v21, vcc
1081 ; SDAG-NEXT: v_or_b32_e32 v10, v22, v24
1082 ; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v0
1083 ; SDAG-NEXT: v_or_b32_e32 v11, v23, v25
1084 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[6:7], v26
1085 ; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v26
1086 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[4:5], v26
1087 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
1088 ; SDAG-NEXT: v_lshr_b64 v[10:11], v[4:5], v27
1089 ; SDAG-NEXT: v_or_b32_e32 v1, v1, v11
1090 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v10
1091 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26
1092 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v9, v1, s[4:5]
1093 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5]
1094 ; SDAG-NEXT: v_cndmask_b32_e64 v9, 0, v21, s[4:5]
1095 ; SDAG-NEXT: v_cndmask_b32_e64 v8, 0, v20, s[4:5]
1096 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
1097 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[4:5]
1098 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5]
1099 ; SDAG-NEXT: v_mov_b32_e32 v20, 0
1100 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
1101 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
1102 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
1103 ; SDAG-NEXT: s_cbranch_execz .LBB1_11
1104 ; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
1105 ; SDAG-NEXT: v_lshr_b64 v[2:3], v[4:5], v22
1106 ; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v22
1107 ; SDAG-NEXT: v_subrev_i32_e32 v28, vcc, 64, v22
1108 ; SDAG-NEXT: v_lshr_b64 v[29:30], v[6:7], v22
1109 ; SDAG-NEXT: v_add_i32_e32 v26, vcc, -1, v12
1110 ; SDAG-NEXT: s_mov_b64 s[10:11], 0
1111 ; SDAG-NEXT: v_mov_b32_e32 v10, 0
1112 ; SDAG-NEXT: v_mov_b32_e32 v11, 0
1113 ; SDAG-NEXT: v_mov_b32_e32 v20, 0
1114 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
1115 ; SDAG-NEXT: v_lshl_b64 v[31:32], v[6:7], v27
1116 ; SDAG-NEXT: v_lshr_b64 v[6:7], v[6:7], v28
1117 ; SDAG-NEXT: v_addc_u32_e32 v27, vcc, -1, v13, vcc
1118 ; SDAG-NEXT: v_or_b32_e32 v3, v3, v32
1119 ; SDAG-NEXT: v_or_b32_e32 v2, v2, v31
1120 ; SDAG-NEXT: v_addc_u32_e32 v28, vcc, -1, v14, vcc
1121 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v22
1122 ; SDAG-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[4:5]
1123 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5]
1124 ; SDAG-NEXT: v_cndmask_b32_e64 v7, 0, v30, s[4:5]
1125 ; SDAG-NEXT: v_cndmask_b32_e64 v6, 0, v29, s[4:5]
1126 ; SDAG-NEXT: v_addc_u32_e32 v29, vcc, -1, v15, vcc
1127 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v22
1128 ; SDAG-NEXT: v_cndmask_b32_e32 v5, v3, v5, vcc
1129 ; SDAG-NEXT: v_cndmask_b32_e32 v4, v2, v4, vcc
1130 ; SDAG-NEXT: v_mov_b32_e32 v3, 0
1131 ; SDAG-NEXT: .LBB1_9: ; %udiv-do-while
1132 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
1133 ; SDAG-NEXT: v_lshl_b64 v[6:7], v[6:7], 1
1134 ; SDAG-NEXT: v_lshrrev_b32_e32 v2, 31, v5
1135 ; SDAG-NEXT: v_lshl_b64 v[4:5], v[4:5], 1
1136 ; SDAG-NEXT: v_lshrrev_b32_e32 v30, 31, v1
1137 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
1138 ; SDAG-NEXT: v_lshrrev_b32_e32 v31, 31, v9
1139 ; SDAG-NEXT: v_lshl_b64 v[8:9], v[8:9], 1
1140 ; SDAG-NEXT: v_or_b32_e32 v6, v6, v2
1141 ; SDAG-NEXT: v_or_b32_e32 v2, v4, v30
1142 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v31
1143 ; SDAG-NEXT: v_or_b32_e32 v1, v21, v1
1144 ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v26, v2
1145 ; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v27, v5, vcc
1146 ; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v28, v6, vcc
1147 ; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v29, v7, vcc
1148 ; SDAG-NEXT: v_ashrrev_i32_e32 v30, 31, v4
1149 ; SDAG-NEXT: v_and_b32_e32 v31, v30, v13
1150 ; SDAG-NEXT: v_and_b32_e32 v4, v30, v12
1151 ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v2, v4
1152 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v31, vcc
1153 ; SDAG-NEXT: v_or_b32_e32 v9, v11, v9
1154 ; SDAG-NEXT: v_or_b32_e32 v0, v20, v0
1155 ; SDAG-NEXT: v_and_b32_e32 v2, 1, v30
1156 ; SDAG-NEXT: v_and_b32_e32 v11, v30, v15
1157 ; SDAG-NEXT: v_and_b32_e32 v30, v30, v14
1158 ; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v30, vcc
1159 ; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v11, vcc
1160 ; SDAG-NEXT: v_add_i32_e32 v22, vcc, -1, v22
1161 ; SDAG-NEXT: v_addc_u32_e32 v23, vcc, -1, v23, vcc
1162 ; SDAG-NEXT: v_addc_u32_e32 v24, vcc, -1, v24, vcc
1163 ; SDAG-NEXT: v_addc_u32_e32 v25, vcc, -1, v25, vcc
1164 ; SDAG-NEXT: v_or_b32_e32 v31, v23, v25
1165 ; SDAG-NEXT: v_or_b32_e32 v30, v22, v24
1166 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[30:31]
1167 ; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
1168 ; SDAG-NEXT: v_or_b32_e32 v8, v10, v8
1169 ; SDAG-NEXT: v_mov_b32_e32 v11, v3
1170 ; SDAG-NEXT: v_mov_b32_e32 v10, v2
1171 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
1172 ; SDAG-NEXT: s_cbranch_execnz .LBB1_9
1173 ; SDAG-NEXT: ; %bb.10: ; %Flow
1174 ; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
1175 ; SDAG-NEXT: .LBB1_11: ; %Flow11
1176 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
1177 ; SDAG-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
1178 ; SDAG-NEXT: v_lshrrev_b32_e32 v6, 31, v9
1179 ; SDAG-NEXT: v_lshl_b64 v[4:5], v[8:9], 1
1180 ; SDAG-NEXT: v_or_b32_e32 v0, v0, v6
1181 ; SDAG-NEXT: v_or_b32_e32 v8, v21, v1
1182 ; SDAG-NEXT: v_or_b32_e32 v10, v3, v5
1183 ; SDAG-NEXT: v_or_b32_e32 v9, v20, v0
1184 ; SDAG-NEXT: v_or_b32_e32 v11, v2, v4
1185 ; SDAG-NEXT: .LBB1_12: ; %Flow12
1186 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
1187 ; SDAG-NEXT: v_mov_b32_e32 v0, v19
1188 ; SDAG-NEXT: v_mov_b32_e32 v1, v18
1189 ; SDAG-NEXT: v_mov_b32_e32 v2, v17
1190 ; SDAG-NEXT: v_mov_b32_e32 v3, v16
1191 ; SDAG-NEXT: v_mov_b32_e32 v4, v11
1192 ; SDAG-NEXT: v_mov_b32_e32 v5, v10
1193 ; SDAG-NEXT: v_mov_b32_e32 v6, v9
1194 ; SDAG-NEXT: v_mov_b32_e32 v7, v8
1195 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1197 ; GISEL-LABEL: v_udiv_v2i128_vv:
1198 ; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
1199 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1200 ; GISEL-NEXT: v_mov_b32_e32 v16, v2
1201 ; GISEL-NEXT: v_mov_b32_e32 v17, v3
1202 ; GISEL-NEXT: v_or_b32_e32 v2, v8, v10
1203 ; GISEL-NEXT: v_or_b32_e32 v3, v9, v11
1204 ; GISEL-NEXT: v_or_b32_e32 v18, v0, v16
1205 ; GISEL-NEXT: v_or_b32_e32 v19, v1, v17
1206 ; GISEL-NEXT: v_ffbh_u32_e32 v20, v9
1207 ; GISEL-NEXT: v_ffbh_u32_e32 v21, v8
1208 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v11
1209 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v10
1210 ; GISEL-NEXT: v_ffbh_u32_e32 v26, v1
1211 ; GISEL-NEXT: v_ffbh_u32_e32 v27, v0
1212 ; GISEL-NEXT: v_ffbh_u32_e32 v28, v16
1213 ; GISEL-NEXT: v_ffbh_u32_e32 v29, v17
1214 ; GISEL-NEXT: v_mov_b32_e32 v24, 0x7f
1215 ; GISEL-NEXT: v_mov_b32_e32 v25, 0
1216 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
1217 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
1218 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
1219 ; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 32, v21
1220 ; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v23
1221 ; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], 32, v27
1222 ; GISEL-NEXT: v_add_i32_e64 v19, s[6:7], 32, v28
1223 ; GISEL-NEXT: v_min_u32_e32 v2, v20, v2
1224 ; GISEL-NEXT: v_min_u32_e32 v3, v22, v3
1225 ; GISEL-NEXT: v_min_u32_e32 v18, v26, v18
1226 ; GISEL-NEXT: v_min_u32_e32 v19, v29, v19
1227 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1228 ; GISEL-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5]
1229 ; GISEL-NEXT: v_add_i32_e32 v2, vcc, 64, v2
1230 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, 64, v18
1231 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
1232 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
1233 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
1234 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v19, v18, vcc
1235 ; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v2, v3
1236 ; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
1237 ; GISEL-NEXT: v_subb_u32_e64 v20, s[4:5], 0, 0, s[4:5]
1238 ; GISEL-NEXT: v_subb_u32_e64 v21, s[4:5], 0, 0, s[4:5]
1239 ; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v22
1240 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
1241 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1242 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v20
1243 ; GISEL-NEXT: v_or_b32_e32 v3, v23, v21
1244 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[20:21]
1245 ; GISEL-NEXT: v_cndmask_b32_e64 v19, 0, 1, vcc
1246 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
1247 ; GISEL-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
1248 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
1249 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
1250 ; GISEL-NEXT: v_or_b32_e32 v3, v26, v18
1251 ; GISEL-NEXT: v_or_b32_e32 v2, v3, v2
1252 ; GISEL-NEXT: v_and_b32_e32 v3, 1, v3
1253 ; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
1254 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v3
1255 ; GISEL-NEXT: v_cndmask_b32_e64 v18, v0, 0, vcc
1256 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v2
1257 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v16, 0, vcc
1258 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v17, 0, vcc
1259 ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1
1260 ; GISEL-NEXT: v_cndmask_b32_e64 v19, v1, 0, vcc
1261 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
1262 ; GISEL-NEXT: s_cbranch_execz .LBB1_6
1263 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
1264 ; GISEL-NEXT: v_add_i32_e32 v26, vcc, 1, v22
1265 ; GISEL-NEXT: v_addc_u32_e64 v27, s[4:5], 0, v23, vcc
1266 ; GISEL-NEXT: v_sub_i32_e32 v30, vcc, 0x7f, v22
1267 ; GISEL-NEXT: v_not_b32_e32 v2, 63
1268 ; GISEL-NEXT: v_addc_u32_e64 v28, vcc, 0, v20, s[4:5]
1269 ; GISEL-NEXT: v_addc_u32_e32 v29, vcc, 0, v21, vcc
1270 ; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v30, v2
1271 ; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v30
1272 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[0:1], v30
1273 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[16:17], v30
1274 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
1275 ; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v20
1276 ; GISEL-NEXT: v_lshl_b64 v[24:25], v[0:1], v22
1277 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30
1278 ; GISEL-NEXT: v_cndmask_b32_e32 v22, 0, v2, vcc
1279 ; GISEL-NEXT: v_cndmask_b32_e32 v23, 0, v3, vcc
1280 ; GISEL-NEXT: v_or_b32_e32 v2, v20, v18
1281 ; GISEL-NEXT: v_or_b32_e32 v3, v21, v19
1282 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v24, v2, vcc
1283 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v25, v3, vcc
1284 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
1285 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v16, vcc
1286 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v17, vcc
1287 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
1288 ; GISEL-NEXT: v_mov_b32_e32 v21, s11
1289 ; GISEL-NEXT: v_mov_b32_e32 v20, s10
1290 ; GISEL-NEXT: v_mov_b32_e32 v19, s9
1291 ; GISEL-NEXT: v_mov_b32_e32 v18, s8
1292 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
1293 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
1294 ; GISEL-NEXT: s_cbranch_execz .LBB1_5
1295 ; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
1296 ; GISEL-NEXT: v_add_i32_e32 v32, vcc, 0xffffffc0, v26
1297 ; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v26
1298 ; GISEL-NEXT: v_lshr_b64 v[18:19], v[16:17], v26
1299 ; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v26
1300 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
1301 ; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v8
1302 ; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v9, vcc
1303 ; GISEL-NEXT: v_lshl_b64 v[24:25], v[16:17], v24
1304 ; GISEL-NEXT: v_lshr_b64 v[16:17], v[16:17], v32
1305 ; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v10, vcc
1306 ; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v11, vcc
1307 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
1308 ; GISEL-NEXT: v_or_b32_e32 v20, v20, v24
1309 ; GISEL-NEXT: v_or_b32_e32 v21, v21, v25
1310 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
1311 ; GISEL-NEXT: v_cndmask_b32_e32 v20, v16, v20, vcc
1312 ; GISEL-NEXT: v_cndmask_b32_e32 v21, v17, v21, vcc
1313 ; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v18, vcc
1314 ; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v19, vcc
1315 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26
1316 ; GISEL-NEXT: v_cndmask_b32_e32 v24, v20, v0, vcc
1317 ; GISEL-NEXT: v_cndmask_b32_e32 v25, v21, v1, vcc
1318 ; GISEL-NEXT: v_mov_b32_e32 v1, 0
1319 ; GISEL-NEXT: v_mov_b32_e32 v21, s7
1320 ; GISEL-NEXT: v_mov_b32_e32 v20, s6
1321 ; GISEL-NEXT: v_mov_b32_e32 v19, s5
1322 ; GISEL-NEXT: v_mov_b32_e32 v18, s4
1323 ; GISEL-NEXT: .LBB1_3: ; %udiv-do-while3
1324 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
1325 ; GISEL-NEXT: v_lshrrev_b32_e32 v34, 31, v23
1326 ; GISEL-NEXT: v_lshl_b64 v[20:21], v[22:23], 1
1327 ; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v25
1328 ; GISEL-NEXT: v_lshl_b64 v[24:25], v[24:25], 1
1329 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
1330 ; GISEL-NEXT: v_lshrrev_b32_e32 v35, 31, v3
1331 ; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v26
1332 ; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v27, vcc
1333 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
1334 ; GISEL-NEXT: v_or_b32_e32 v22, v18, v20
1335 ; GISEL-NEXT: v_or_b32_e32 v23, v19, v21
1336 ; GISEL-NEXT: v_or_b32_e32 v16, v16, v0
1337 ; GISEL-NEXT: v_or_b32_e32 v20, v24, v35
1338 ; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v28, vcc
1339 ; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v29, vcc
1340 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v30, v20
1341 ; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v31, v25, vcc
1342 ; GISEL-NEXT: v_or_b32_e32 v18, v26, v28
1343 ; GISEL-NEXT: v_or_b32_e32 v19, v27, v29
1344 ; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v32, v16, vcc
1345 ; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v33, v17, vcc
1346 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
1347 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v0
1348 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1349 ; GISEL-NEXT: v_and_b32_e32 v18, v0, v8
1350 ; GISEL-NEXT: v_and_b32_e32 v19, v0, v9
1351 ; GISEL-NEXT: v_and_b32_e32 v21, v0, v10
1352 ; GISEL-NEXT: v_and_b32_e32 v35, v0, v11
1353 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
1354 ; GISEL-NEXT: v_sub_i32_e32 v24, vcc, v20, v18
1355 ; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v25, v19, vcc
1356 ; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v21, vcc
1357 ; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v35, vcc
1358 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v34
1359 ; GISEL-NEXT: v_mov_b32_e32 v19, v1
1360 ; GISEL-NEXT: v_mov_b32_e32 v18, v0
1361 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
1362 ; GISEL-NEXT: s_cbranch_execnz .LBB1_3
1363 ; GISEL-NEXT: ; %bb.4: ; %Flow13
1364 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
1365 ; GISEL-NEXT: .LBB1_5: ; %Flow14
1366 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
1367 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[22:23], 1
1368 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[2:3], 1
1369 ; GISEL-NEXT: v_lshrrev_b32_e32 v8, 31, v23
1370 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v8
1371 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v0
1372 ; GISEL-NEXT: v_or_b32_e32 v19, v19, v1
1373 ; GISEL-NEXT: .LBB1_6: ; %Flow16
1374 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
1375 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
1376 ; GISEL-NEXT: v_or_b32_e32 v0, v12, v14
1377 ; GISEL-NEXT: v_or_b32_e32 v1, v13, v15
1378 ; GISEL-NEXT: v_or_b32_e32 v8, v4, v6
1379 ; GISEL-NEXT: v_or_b32_e32 v9, v5, v7
1380 ; GISEL-NEXT: v_ffbh_u32_e32 v16, v13
1381 ; GISEL-NEXT: v_ffbh_u32_e32 v17, v12
1382 ; GISEL-NEXT: v_ffbh_u32_e32 v20, v15
1383 ; GISEL-NEXT: v_ffbh_u32_e32 v21, v14
1384 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v5
1385 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v4
1386 ; GISEL-NEXT: v_ffbh_u32_e32 v24, v7
1387 ; GISEL-NEXT: v_ffbh_u32_e32 v25, v6
1388 ; GISEL-NEXT: v_mov_b32_e32 v10, 0x7f
1389 ; GISEL-NEXT: v_mov_b32_e32 v11, 0
1390 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1391 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[8:9]
1392 ; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 32, v17
1393 ; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v21
1394 ; GISEL-NEXT: v_add_i32_e64 v8, s[6:7], 32, v23
1395 ; GISEL-NEXT: v_add_i32_e64 v9, s[6:7], 32, v25
1396 ; GISEL-NEXT: v_min_u32_e32 v0, v16, v0
1397 ; GISEL-NEXT: v_min_u32_e32 v1, v20, v1
1398 ; GISEL-NEXT: v_min_u32_e32 v8, v22, v8
1399 ; GISEL-NEXT: v_min_u32_e32 v9, v24, v9
1400 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1401 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
1402 ; GISEL-NEXT: v_add_i32_e32 v0, vcc, 64, v0
1403 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, 64, v8
1404 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
1405 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
1406 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
1407 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc
1408 ; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v0, v1
1409 ; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, vcc
1410 ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
1411 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
1412 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[16:17], v[10:11]
1413 ; GISEL-NEXT: v_cndmask_b32_e64 v10, 0, 1, vcc
1414 ; GISEL-NEXT: v_xor_b32_e32 v8, 0x7f, v16
1415 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
1416 ; GISEL-NEXT: v_cndmask_b32_e64 v11, 0, 1, vcc
1417 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
1418 ; GISEL-NEXT: v_or_b32_e32 v9, v17, v1
1419 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
1420 ; GISEL-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
1421 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1422 ; GISEL-NEXT: v_cndmask_b32_e64 v8, 0, 1, vcc
1423 ; GISEL-NEXT: v_or_b32_e32 v9, v20, v10
1424 ; GISEL-NEXT: v_and_b32_e32 v10, 1, v9
1425 ; GISEL-NEXT: v_or_b32_e32 v8, v9, v8
1426 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v10
1427 ; GISEL-NEXT: v_cndmask_b32_e64 v10, v4, 0, vcc
1428 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v8
1429 ; GISEL-NEXT: v_cndmask_b32_e64 v11, v5, 0, vcc
1430 ; GISEL-NEXT: v_cndmask_b32_e64 v8, v6, 0, vcc
1431 ; GISEL-NEXT: v_cndmask_b32_e64 v9, v7, 0, vcc
1432 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20
1433 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
1434 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
1435 ; GISEL-NEXT: s_cbranch_execz .LBB1_12
1436 ; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
1437 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v16
1438 ; GISEL-NEXT: v_addc_u32_e64 v11, s[4:5], 0, v17, vcc
1439 ; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
1440 ; GISEL-NEXT: v_not_b32_e32 v9, 63
1441 ; GISEL-NEXT: v_addc_u32_e64 v24, vcc, 0, v0, s[4:5]
1442 ; GISEL-NEXT: v_addc_u32_e32 v25, vcc, 0, v1, vcc
1443 ; GISEL-NEXT: v_add_i32_e64 v9, s[4:5], v26, v9
1444 ; GISEL-NEXT: v_sub_i32_e64 v10, s[4:5], 64, v26
1445 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[4:5], v26
1446 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[6:7], v26
1447 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
1448 ; GISEL-NEXT: v_lshr_b64 v[20:21], v[4:5], v10
1449 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[4:5], v9
1450 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
1451 ; GISEL-NEXT: v_cndmask_b32_e32 v9, 0, v0, vcc
1452 ; GISEL-NEXT: v_cndmask_b32_e32 v10, 0, v1, vcc
1453 ; GISEL-NEXT: v_or_b32_e32 v0, v20, v16
1454 ; GISEL-NEXT: v_or_b32_e32 v1, v21, v17
1455 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc
1456 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc
1457 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26
1458 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc
1459 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc
1460 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
1461 ; GISEL-NEXT: v_mov_b32_e32 v23, s11
1462 ; GISEL-NEXT: v_mov_b32_e32 v22, s10
1463 ; GISEL-NEXT: v_mov_b32_e32 v21, s9
1464 ; GISEL-NEXT: v_mov_b32_e32 v20, s8
1465 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
1466 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
1467 ; GISEL-NEXT: s_cbranch_execz .LBB1_11
1468 ; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
1469 ; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v8
1470 ; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v8
1471 ; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v8
1472 ; GISEL-NEXT: v_lshr_b64 v[20:21], v[4:5], v8
1473 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
1474 ; GISEL-NEXT: v_add_i32_e32 v26, vcc, -1, v12
1475 ; GISEL-NEXT: v_addc_u32_e32 v27, vcc, -1, v13, vcc
1476 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], v22
1477 ; GISEL-NEXT: v_lshr_b64 v[6:7], v[6:7], v28
1478 ; GISEL-NEXT: v_addc_u32_e32 v28, vcc, -1, v14, vcc
1479 ; GISEL-NEXT: v_addc_u32_e32 v29, vcc, -1, v15, vcc
1480 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
1481 ; GISEL-NEXT: v_or_b32_e32 v20, v20, v22
1482 ; GISEL-NEXT: v_or_b32_e32 v21, v21, v23
1483 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v8
1484 ; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v20, vcc
1485 ; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v21, vcc
1486 ; GISEL-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc
1487 ; GISEL-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc
1488 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v8
1489 ; GISEL-NEXT: v_cndmask_b32_e32 v6, v6, v4, vcc
1490 ; GISEL-NEXT: v_cndmask_b32_e32 v7, v7, v5, vcc
1491 ; GISEL-NEXT: v_mov_b32_e32 v5, 0
1492 ; GISEL-NEXT: v_mov_b32_e32 v23, s7
1493 ; GISEL-NEXT: v_mov_b32_e32 v22, s6
1494 ; GISEL-NEXT: v_mov_b32_e32 v21, s5
1495 ; GISEL-NEXT: v_mov_b32_e32 v20, s4
1496 ; GISEL-NEXT: .LBB1_9: ; %udiv-do-while
1497 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
1498 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], 1
1499 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
1500 ; GISEL-NEXT: v_lshrrev_b32_e32 v4, 31, v7
1501 ; GISEL-NEXT: v_lshrrev_b32_e32 v30, 31, v1
1502 ; GISEL-NEXT: v_lshl_b64 v[6:7], v[9:10], 1
1503 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[0:1], 1
1504 ; GISEL-NEXT: v_lshrrev_b32_e32 v9, 31, v10
1505 ; GISEL-NEXT: v_add_i32_e32 v8, vcc, -1, v8
1506 ; GISEL-NEXT: v_addc_u32_e32 v11, vcc, -1, v11, vcc
1507 ; GISEL-NEXT: v_or_b32_e32 v16, v16, v4
1508 ; GISEL-NEXT: v_or_b32_e32 v22, v22, v30
1509 ; GISEL-NEXT: v_or_b32_e32 v0, v0, v9
1510 ; GISEL-NEXT: v_or_b32_e32 v9, v20, v6
1511 ; GISEL-NEXT: v_or_b32_e32 v10, v21, v7
1512 ; GISEL-NEXT: v_addc_u32_e32 v24, vcc, -1, v24, vcc
1513 ; GISEL-NEXT: v_addc_u32_e32 v25, vcc, -1, v25, vcc
1514 ; GISEL-NEXT: v_sub_i32_e32 v4, vcc, v26, v22
1515 ; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v27, v23, vcc
1516 ; GISEL-NEXT: v_or_b32_e32 v6, v8, v24
1517 ; GISEL-NEXT: v_or_b32_e32 v7, v11, v25
1518 ; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v28, v16, vcc
1519 ; GISEL-NEXT: v_subb_u32_e32 v4, vcc, v29, v17, vcc
1520 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
1521 ; GISEL-NEXT: v_ashrrev_i32_e32 v6, 31, v4
1522 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
1523 ; GISEL-NEXT: v_and_b32_e32 v4, 1, v6
1524 ; GISEL-NEXT: v_and_b32_e32 v7, v6, v12
1525 ; GISEL-NEXT: v_and_b32_e32 v30, v6, v13
1526 ; GISEL-NEXT: v_and_b32_e32 v31, v6, v14
1527 ; GISEL-NEXT: v_and_b32_e32 v32, v6, v15
1528 ; GISEL-NEXT: v_mov_b32_e32 v21, v5
1529 ; GISEL-NEXT: v_mov_b32_e32 v20, v4
1530 ; GISEL-NEXT: v_sub_i32_e32 v6, vcc, v22, v7
1531 ; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v23, v30, vcc
1532 ; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v16, v31, vcc
1533 ; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v17, v32, vcc
1534 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
1535 ; GISEL-NEXT: s_cbranch_execnz .LBB1_9
1536 ; GISEL-NEXT: ; %bb.10: ; %Flow
1537 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
1538 ; GISEL-NEXT: .LBB1_11: ; %Flow11
1539 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
1540 ; GISEL-NEXT: v_lshl_b64 v[4:5], v[9:10], 1
1541 ; GISEL-NEXT: v_lshl_b64 v[8:9], v[0:1], 1
1542 ; GISEL-NEXT: v_lshrrev_b32_e32 v0, 31, v10
1543 ; GISEL-NEXT: v_or_b32_e32 v8, v8, v0
1544 ; GISEL-NEXT: v_or_b32_e32 v10, v20, v4
1545 ; GISEL-NEXT: v_or_b32_e32 v11, v21, v5
1546 ; GISEL-NEXT: .LBB1_12: ; %Flow12
1547 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
1548 ; GISEL-NEXT: v_mov_b32_e32 v0, v18
1549 ; GISEL-NEXT: v_mov_b32_e32 v1, v19
1550 ; GISEL-NEXT: v_mov_b32_e32 v4, v10
1551 ; GISEL-NEXT: v_mov_b32_e32 v5, v11
1552 ; GISEL-NEXT: v_mov_b32_e32 v6, v8
1553 ; GISEL-NEXT: v_mov_b32_e32 v7, v9
1554 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1555 %shl = udiv <2 x i128> %lhs, %rhs
1559 define <2 x i128> @v_srem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
1560 ; SDAG-LABEL: v_srem_v2i128_vv:
1561 ; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
1562 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1563 ; SDAG-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
1564 ; SDAG-NEXT: v_sub_i32_e32 v16, vcc, 0, v0
1565 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
1566 ; SDAG-NEXT: v_ashrrev_i32_e32 v28, 31, v3
1567 ; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
1568 ; SDAG-NEXT: v_subb_u32_e32 v17, vcc, 0, v1, vcc
1569 ; SDAG-NEXT: v_mov_b32_e32 v29, v28
1570 ; SDAG-NEXT: v_subb_u32_e32 v18, vcc, 0, v2, vcc
1571 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[2:3]
1572 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v1, v17, s[4:5]
1573 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v0, v16, s[4:5]
1574 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, 0, v3, vcc
1575 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v2, v18, s[4:5]
1576 ; SDAG-NEXT: v_ffbh_u32_e32 v18, v16
1577 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v17
1578 ; SDAG-NEXT: v_sub_i32_e32 v21, vcc, 0, v8
1579 ; SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5]
1580 ; SDAG-NEXT: v_or_b32_e32 v2, v16, v0
1581 ; SDAG-NEXT: v_add_i32_e64 v18, s[4:5], 32, v18
1582 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v0
1583 ; SDAG-NEXT: v_subb_u32_e32 v23, vcc, 0, v9, vcc
1584 ; SDAG-NEXT: v_or_b32_e32 v3, v17, v1
1585 ; SDAG-NEXT: v_min_u32_e32 v18, v18, v20
1586 ; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], 32, v22
1587 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v1
1588 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[10:11]
1589 ; SDAG-NEXT: v_cndmask_b32_e64 v30, v9, v23, s[4:5]
1590 ; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v10, vcc
1591 ; SDAG-NEXT: v_cndmask_b32_e64 v31, v8, v21, s[4:5]
1592 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[2:3]
1593 ; SDAG-NEXT: v_min_u32_e32 v3, v20, v22
1594 ; SDAG-NEXT: v_add_i32_e64 v8, s[8:9], 64, v18
1595 ; SDAG-NEXT: v_addc_u32_e64 v18, s[8:9], 0, 0, s[8:9]
1596 ; SDAG-NEXT: v_subb_u32_e32 v20, vcc, 0, v11, vcc
1597 ; SDAG-NEXT: v_cndmask_b32_e64 v2, v10, v9, s[4:5]
1598 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
1599 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, 0, vcc
1600 ; SDAG-NEXT: v_cndmask_b32_e32 v10, v8, v3, vcc
1601 ; SDAG-NEXT: v_ffbh_u32_e32 v9, v31
1602 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v30
1603 ; SDAG-NEXT: v_cndmask_b32_e64 v3, v11, v20, s[4:5]
1604 ; SDAG-NEXT: v_or_b32_e32 v8, v31, v2
1605 ; SDAG-NEXT: v_add_i32_e32 v11, vcc, 32, v9
1606 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v2
1607 ; SDAG-NEXT: v_or_b32_e32 v9, v30, v3
1608 ; SDAG-NEXT: v_min_u32_e32 v11, v11, v21
1609 ; SDAG-NEXT: v_add_i32_e32 v20, vcc, 32, v20
1610 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v3
1611 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
1612 ; SDAG-NEXT: v_min_u32_e32 v8, v20, v21
1613 ; SDAG-NEXT: v_add_i32_e64 v9, s[4:5], 64, v11
1614 ; SDAG-NEXT: v_addc_u32_e64 v11, s[4:5], 0, 0, s[4:5]
1615 ; SDAG-NEXT: v_cmp_ne_u64_e64 s[4:5], 0, v[2:3]
1616 ; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, 0, s[4:5]
1617 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v9, v8, s[4:5]
1618 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
1619 ; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v8, v10
1620 ; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc
1621 ; SDAG-NEXT: v_xor_b32_e32 v8, 0x7f, v10
1622 ; SDAG-NEXT: v_subbrev_u32_e32 v18, vcc, 0, v19, vcc
1623 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
1624 ; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
1625 ; SDAG-NEXT: v_subbrev_u32_e32 v19, vcc, 0, v19, vcc
1626 ; SDAG-NEXT: v_or_b32_e32 v8, v8, v18
1627 ; SDAG-NEXT: v_or_b32_e32 v9, v11, v19
1628 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
1629 ; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
1630 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[8:9]
1631 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
1632 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v21, v20, s[4:5]
1633 ; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
1634 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v8
1635 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
1636 ; SDAG-NEXT: v_cndmask_b32_e64 v35, v1, 0, s[4:5]
1637 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
1638 ; SDAG-NEXT: v_cndmask_b32_e64 v32, v0, 0, s[4:5]
1639 ; SDAG-NEXT: v_cndmask_b32_e64 v27, v17, 0, s[4:5]
1640 ; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc
1641 ; SDAG-NEXT: v_cndmask_b32_e64 v33, v16, 0, s[4:5]
1642 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
1643 ; SDAG-NEXT: s_cbranch_execz .LBB2_6
1644 ; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
1645 ; SDAG-NEXT: v_add_i32_e32 v32, vcc, 1, v10
1646 ; SDAG-NEXT: v_sub_i32_e64 v20, s[4:5], 63, v10
1647 ; SDAG-NEXT: v_mov_b32_e32 v8, 0
1648 ; SDAG-NEXT: v_mov_b32_e32 v9, 0
1649 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v11, vcc
1650 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[16:17], v20
1651 ; SDAG-NEXT: v_addc_u32_e32 v34, vcc, 0, v18, vcc
1652 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v19, vcc
1653 ; SDAG-NEXT: v_or_b32_e32 v18, v32, v34
1654 ; SDAG-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v10
1655 ; SDAG-NEXT: v_or_b32_e32 v19, v33, v35
1656 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[0:1], v24
1657 ; SDAG-NEXT: v_sub_i32_e32 v25, vcc, 64, v24
1658 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[16:17], v24
1659 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
1660 ; SDAG-NEXT: v_lshr_b64 v[18:19], v[16:17], v25
1661 ; SDAG-NEXT: v_or_b32_e32 v11, v11, v19
1662 ; SDAG-NEXT: v_or_b32_e32 v10, v10, v18
1663 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24
1664 ; SDAG-NEXT: v_cndmask_b32_e64 v11, v21, v11, s[4:5]
1665 ; SDAG-NEXT: v_cndmask_b32_e64 v10, v20, v10, s[4:5]
1666 ; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v23, s[4:5]
1667 ; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v22, s[4:5]
1668 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v24
1669 ; SDAG-NEXT: v_cndmask_b32_e64 v11, v11, v1, s[4:5]
1670 ; SDAG-NEXT: v_cndmask_b32_e64 v10, v10, v0, s[4:5]
1671 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
1672 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
1673 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
1674 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
1675 ; SDAG-NEXT: s_cbranch_execz .LBB2_5
1676 ; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
1677 ; SDAG-NEXT: v_lshr_b64 v[8:9], v[16:17], v32
1678 ; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 64, v32
1679 ; SDAG-NEXT: v_subrev_i32_e32 v37, vcc, 64, v32
1680 ; SDAG-NEXT: v_lshr_b64 v[24:25], v[0:1], v32
1681 ; SDAG-NEXT: v_add_i32_e32 v36, vcc, -1, v31
1682 ; SDAG-NEXT: s_mov_b64 s[10:11], 0
1683 ; SDAG-NEXT: v_mov_b32_e32 v22, 0
1684 ; SDAG-NEXT: v_mov_b32_e32 v23, 0
1685 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
1686 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
1687 ; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v26
1688 ; SDAG-NEXT: v_lshr_b64 v[48:49], v[0:1], v37
1689 ; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v30, vcc
1690 ; SDAG-NEXT: v_or_b32_e32 v9, v9, v27
1691 ; SDAG-NEXT: v_or_b32_e32 v8, v8, v26
1692 ; SDAG-NEXT: v_addc_u32_e32 v38, vcc, -1, v2, vcc
1693 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v32
1694 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v49, v9, s[4:5]
1695 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v48, v8, s[4:5]
1696 ; SDAG-NEXT: v_cndmask_b32_e64 v27, 0, v25, s[4:5]
1697 ; SDAG-NEXT: v_cndmask_b32_e64 v26, 0, v24, s[4:5]
1698 ; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v3, vcc
1699 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v32
1700 ; SDAG-NEXT: v_cndmask_b32_e32 v25, v9, v17, vcc
1701 ; SDAG-NEXT: v_cndmask_b32_e32 v24, v8, v16, vcc
1702 ; SDAG-NEXT: v_mov_b32_e32 v9, 0
1703 ; SDAG-NEXT: .LBB2_3: ; %udiv-do-while3
1704 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
1705 ; SDAG-NEXT: v_lshrrev_b32_e32 v8, 31, v21
1706 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
1707 ; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
1708 ; SDAG-NEXT: v_lshrrev_b32_e32 v48, 31, v25
1709 ; SDAG-NEXT: v_lshl_b64 v[24:25], v[24:25], 1
1710 ; SDAG-NEXT: v_lshrrev_b32_e32 v49, 31, v11
1711 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
1712 ; SDAG-NEXT: v_or_b32_e32 v21, v23, v21
1713 ; SDAG-NEXT: v_or_b32_e32 v20, v22, v20
1714 ; SDAG-NEXT: v_or_b32_e32 v22, v26, v48
1715 ; SDAG-NEXT: v_or_b32_e32 v23, v24, v49
1716 ; SDAG-NEXT: v_or_b32_e32 v10, v10, v8
1717 ; SDAG-NEXT: v_sub_i32_e32 v8, vcc, v36, v23
1718 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v37, v25, vcc
1719 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v38, v22, vcc
1720 ; SDAG-NEXT: v_subb_u32_e32 v8, vcc, v39, v27, vcc
1721 ; SDAG-NEXT: v_ashrrev_i32_e32 v8, 31, v8
1722 ; SDAG-NEXT: v_and_b32_e32 v24, v8, v31
1723 ; SDAG-NEXT: v_and_b32_e32 v26, v8, v30
1724 ; SDAG-NEXT: v_and_b32_e32 v48, v8, v2
1725 ; SDAG-NEXT: v_and_b32_e32 v49, v8, v3
1726 ; SDAG-NEXT: v_and_b32_e32 v8, 1, v8
1727 ; SDAG-NEXT: v_sub_i32_e32 v24, vcc, v23, v24
1728 ; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v25, v26, vcc
1729 ; SDAG-NEXT: v_subb_u32_e32 v26, vcc, v22, v48, vcc
1730 ; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v49, vcc
1731 ; SDAG-NEXT: v_add_i32_e32 v32, vcc, -1, v32
1732 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
1733 ; SDAG-NEXT: v_addc_u32_e32 v34, vcc, -1, v34, vcc
1734 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc
1735 ; SDAG-NEXT: v_or_b32_e32 v22, v32, v34
1736 ; SDAG-NEXT: v_or_b32_e32 v23, v33, v35
1737 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[22:23]
1738 ; SDAG-NEXT: v_or_b32_e32 v11, v19, v11
1739 ; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
1740 ; SDAG-NEXT: v_or_b32_e32 v10, v18, v10
1741 ; SDAG-NEXT: v_mov_b32_e32 v23, v9
1742 ; SDAG-NEXT: v_mov_b32_e32 v22, v8
1743 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
1744 ; SDAG-NEXT: s_cbranch_execnz .LBB2_3
1745 ; SDAG-NEXT: ; %bb.4: ; %Flow13
1746 ; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
1747 ; SDAG-NEXT: .LBB2_5: ; %Flow14
1748 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
1749 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
1750 ; SDAG-NEXT: v_lshrrev_b32_e32 v22, 31, v21
1751 ; SDAG-NEXT: v_or_b32_e32 v10, v10, v22
1752 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
1753 ; SDAG-NEXT: v_or_b32_e32 v35, v19, v11
1754 ; SDAG-NEXT: v_or_b32_e32 v32, v18, v10
1755 ; SDAG-NEXT: v_or_b32_e32 v27, v9, v21
1756 ; SDAG-NEXT: v_or_b32_e32 v33, v8, v20
1757 ; SDAG-NEXT: .LBB2_6: ; %Flow16
1758 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
1759 ; SDAG-NEXT: v_ashrrev_i32_e32 v26, 31, v7
1760 ; SDAG-NEXT: v_sub_i32_e32 v8, vcc, 0, v4
1761 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
1762 ; SDAG-NEXT: s_mov_b64 s[10:11], 0x7f
1763 ; SDAG-NEXT: v_mov_b32_e32 v34, v26
1764 ; SDAG-NEXT: v_subb_u32_e32 v9, vcc, 0, v5, vcc
1765 ; SDAG-NEXT: v_subb_u32_e32 v10, vcc, 0, v6, vcc
1766 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[6:7]
1767 ; SDAG-NEXT: v_cndmask_b32_e64 v9, v5, v9, s[4:5]
1768 ; SDAG-NEXT: v_cndmask_b32_e64 v8, v4, v8, s[4:5]
1769 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, 0, v7, vcc
1770 ; SDAG-NEXT: v_cndmask_b32_e64 v4, v6, v10, s[4:5]
1771 ; SDAG-NEXT: v_ffbh_u32_e32 v10, v8
1772 ; SDAG-NEXT: v_ffbh_u32_e32 v11, v9
1773 ; SDAG-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5]
1774 ; SDAG-NEXT: v_sub_i32_e32 v19, vcc, 0, v12
1775 ; SDAG-NEXT: v_or_b32_e32 v6, v8, v4
1776 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v4
1777 ; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], 32, v10
1778 ; SDAG-NEXT: v_subb_u32_e32 v21, vcc, 0, v13, vcc
1779 ; SDAG-NEXT: v_or_b32_e32 v7, v9, v5
1780 ; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], 32, v20
1781 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v5
1782 ; SDAG-NEXT: v_min_u32_e32 v10, v10, v11
1783 ; SDAG-NEXT: v_subb_u32_e32 v11, vcc, 0, v14, vcc
1784 ; SDAG-NEXT: v_cmp_gt_i64_e64 s[4:5], 0, v[14:15]
1785 ; SDAG-NEXT: v_cndmask_b32_e64 v36, v13, v21, s[4:5]
1786 ; SDAG-NEXT: v_cndmask_b32_e64 v37, v12, v19, s[4:5]
1787 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[6:7]
1788 ; SDAG-NEXT: v_min_u32_e32 v7, v20, v22
1789 ; SDAG-NEXT: v_add_i32_e64 v10, s[8:9], 64, v10
1790 ; SDAG-NEXT: v_addc_u32_e64 v12, s[8:9], 0, 0, s[8:9]
1791 ; SDAG-NEXT: v_subb_u32_e32 v13, vcc, 0, v15, vcc
1792 ; SDAG-NEXT: v_cndmask_b32_e64 v6, v14, v11, s[4:5]
1793 ; SDAG-NEXT: v_ffbh_u32_e32 v11, v37
1794 ; SDAG-NEXT: v_ffbh_u32_e32 v14, v36
1795 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
1796 ; SDAG-NEXT: v_cndmask_b32_e64 v12, v12, 0, vcc
1797 ; SDAG-NEXT: v_cndmask_b32_e32 v19, v10, v7, vcc
1798 ; SDAG-NEXT: v_cndmask_b32_e64 v7, v15, v13, s[4:5]
1799 ; SDAG-NEXT: v_or_b32_e32 v10, v37, v6
1800 ; SDAG-NEXT: v_ffbh_u32_e32 v13, v6
1801 ; SDAG-NEXT: v_add_i32_e32 v15, vcc, 32, v11
1802 ; SDAG-NEXT: v_or_b32_e32 v11, v36, v7
1803 ; SDAG-NEXT: v_add_i32_e32 v13, vcc, 32, v13
1804 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v7
1805 ; SDAG-NEXT: v_min_u32_e32 v14, v15, v14
1806 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
1807 ; SDAG-NEXT: v_min_u32_e32 v10, v13, v20
1808 ; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], 64, v14
1809 ; SDAG-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
1810 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
1811 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
1812 ; SDAG-NEXT: v_cndmask_b32_e64 v13, v13, 0, vcc
1813 ; SDAG-NEXT: v_cndmask_b32_e32 v10, v11, v10, vcc
1814 ; SDAG-NEXT: v_sub_i32_e32 v10, vcc, v10, v19
1815 ; SDAG-NEXT: v_subb_u32_e32 v11, vcc, v13, v12, vcc
1816 ; SDAG-NEXT: v_xor_b32_e32 v14, 0x7f, v10
1817 ; SDAG-NEXT: v_subbrev_u32_e32 v12, vcc, 0, v18, vcc
1818 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[10:11], v[10:11]
1819 ; SDAG-NEXT: v_cndmask_b32_e64 v19, 0, 1, s[4:5]
1820 ; SDAG-NEXT: v_subbrev_u32_e32 v13, vcc, 0, v18, vcc
1821 ; SDAG-NEXT: v_or_b32_e32 v14, v14, v12
1822 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[12:13]
1823 ; SDAG-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
1824 ; SDAG-NEXT: v_or_b32_e32 v15, v11, v13
1825 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[12:13]
1826 ; SDAG-NEXT: v_cndmask_b32_e32 v18, v18, v19, vcc
1827 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
1828 ; SDAG-NEXT: v_and_b32_e32 v14, 1, v18
1829 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v14
1830 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
1831 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v5, 0, s[4:5]
1832 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
1833 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v4, 0, s[4:5]
1834 ; SDAG-NEXT: v_cndmask_b32_e64 v15, v9, 0, s[4:5]
1835 ; SDAG-NEXT: v_cndmask_b32_e64 v14, v8, 0, s[4:5]
1836 ; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
1837 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
1838 ; SDAG-NEXT: s_cbranch_execz .LBB2_12
1839 ; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
1840 ; SDAG-NEXT: v_add_i32_e32 v38, vcc, 1, v10
1841 ; SDAG-NEXT: v_sub_i32_e64 v18, s[4:5], 63, v10
1842 ; SDAG-NEXT: v_mov_b32_e32 v14, 0
1843 ; SDAG-NEXT: v_mov_b32_e32 v15, 0
1844 ; SDAG-NEXT: v_addc_u32_e32 v39, vcc, 0, v11, vcc
1845 ; SDAG-NEXT: v_lshl_b64 v[18:19], v[8:9], v18
1846 ; SDAG-NEXT: v_addc_u32_e32 v48, vcc, 0, v12, vcc
1847 ; SDAG-NEXT: v_addc_u32_e32 v49, vcc, 0, v13, vcc
1848 ; SDAG-NEXT: v_or_b32_e32 v11, v38, v48
1849 ; SDAG-NEXT: v_sub_i32_e32 v13, vcc, 0x7f, v10
1850 ; SDAG-NEXT: v_or_b32_e32 v12, v39, v49
1851 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[4:5], v13
1852 ; SDAG-NEXT: v_sub_i32_e32 v10, vcc, 64, v13
1853 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[8:9], v13
1854 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[11:12]
1855 ; SDAG-NEXT: v_lshr_b64 v[10:11], v[8:9], v10
1856 ; SDAG-NEXT: v_or_b32_e32 v11, v21, v11
1857 ; SDAG-NEXT: v_or_b32_e32 v10, v20, v10
1858 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v13
1859 ; SDAG-NEXT: v_cndmask_b32_e64 v12, v19, v11, s[4:5]
1860 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v10, s[4:5]
1861 ; SDAG-NEXT: v_cndmask_b32_e64 v11, 0, v23, s[4:5]
1862 ; SDAG-NEXT: v_cndmask_b32_e64 v10, 0, v22, s[4:5]
1863 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13
1864 ; SDAG-NEXT: v_cndmask_b32_e64 v13, v12, v5, s[4:5]
1865 ; SDAG-NEXT: v_cndmask_b32_e64 v12, v18, v4, s[4:5]
1866 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
1867 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
1868 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
1869 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
1870 ; SDAG-NEXT: s_cbranch_execz .LBB2_11
1871 ; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
1872 ; SDAG-NEXT: v_lshr_b64 v[14:15], v[8:9], v38
1873 ; SDAG-NEXT: v_sub_i32_e32 v24, vcc, 64, v38
1874 ; SDAG-NEXT: v_subrev_i32_e32 v51, vcc, 64, v38
1875 ; SDAG-NEXT: v_lshr_b64 v[22:23], v[4:5], v38
1876 ; SDAG-NEXT: v_add_i32_e32 v50, vcc, -1, v37
1877 ; SDAG-NEXT: s_mov_b64 s[10:11], 0
1878 ; SDAG-NEXT: v_mov_b32_e32 v20, 0
1879 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
1880 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
1881 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
1882 ; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v24
1883 ; SDAG-NEXT: v_lshr_b64 v[53:54], v[4:5], v51
1884 ; SDAG-NEXT: v_addc_u32_e32 v51, vcc, -1, v36, vcc
1885 ; SDAG-NEXT: v_or_b32_e32 v15, v15, v25
1886 ; SDAG-NEXT: v_or_b32_e32 v14, v14, v24
1887 ; SDAG-NEXT: v_addc_u32_e32 v52, vcc, -1, v6, vcc
1888 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v38
1889 ; SDAG-NEXT: v_cndmask_b32_e64 v15, v54, v15, s[4:5]
1890 ; SDAG-NEXT: v_cndmask_b32_e64 v14, v53, v14, s[4:5]
1891 ; SDAG-NEXT: v_cndmask_b32_e64 v25, 0, v23, s[4:5]
1892 ; SDAG-NEXT: v_cndmask_b32_e64 v24, 0, v22, s[4:5]
1893 ; SDAG-NEXT: v_addc_u32_e32 v53, vcc, -1, v7, vcc
1894 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v38
1895 ; SDAG-NEXT: v_cndmask_b32_e32 v23, v15, v9, vcc
1896 ; SDAG-NEXT: v_cndmask_b32_e32 v22, v14, v8, vcc
1897 ; SDAG-NEXT: v_mov_b32_e32 v15, 0
1898 ; SDAG-NEXT: .LBB2_9: ; %udiv-do-while
1899 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
1900 ; SDAG-NEXT: v_lshl_b64 v[24:25], v[24:25], 1
1901 ; SDAG-NEXT: v_lshrrev_b32_e32 v14, 31, v23
1902 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
1903 ; SDAG-NEXT: v_lshrrev_b32_e32 v54, 31, v13
1904 ; SDAG-NEXT: v_lshl_b64 v[12:13], v[12:13], 1
1905 ; SDAG-NEXT: v_lshrrev_b32_e32 v55, 31, v11
1906 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
1907 ; SDAG-NEXT: v_or_b32_e32 v24, v24, v14
1908 ; SDAG-NEXT: v_or_b32_e32 v22, v22, v54
1909 ; SDAG-NEXT: v_or_b32_e32 v12, v12, v55
1910 ; SDAG-NEXT: v_or_b32_e32 v13, v19, v13
1911 ; SDAG-NEXT: v_or_b32_e32 v11, v21, v11
1912 ; SDAG-NEXT: v_or_b32_e32 v12, v18, v12
1913 ; SDAG-NEXT: v_sub_i32_e32 v14, vcc, v50, v22
1914 ; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v51, v23, vcc
1915 ; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v52, v24, vcc
1916 ; SDAG-NEXT: v_subb_u32_e32 v14, vcc, v53, v25, vcc
1917 ; SDAG-NEXT: v_ashrrev_i32_e32 v21, 31, v14
1918 ; SDAG-NEXT: v_and_b32_e32 v14, 1, v21
1919 ; SDAG-NEXT: v_and_b32_e32 v54, v21, v7
1920 ; SDAG-NEXT: v_and_b32_e32 v55, v21, v6
1921 ; SDAG-NEXT: v_and_b32_e32 v40, v21, v36
1922 ; SDAG-NEXT: v_and_b32_e32 v21, v21, v37
1923 ; SDAG-NEXT: v_sub_i32_e32 v22, vcc, v22, v21
1924 ; SDAG-NEXT: v_subb_u32_e32 v23, vcc, v23, v40, vcc
1925 ; SDAG-NEXT: v_subb_u32_e32 v24, vcc, v24, v55, vcc
1926 ; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v25, v54, vcc
1927 ; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v38
1928 ; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v39, vcc
1929 ; SDAG-NEXT: v_addc_u32_e32 v48, vcc, -1, v48, vcc
1930 ; SDAG-NEXT: v_addc_u32_e32 v49, vcc, -1, v49, vcc
1931 ; SDAG-NEXT: v_or_b32_e32 v55, v39, v49
1932 ; SDAG-NEXT: v_or_b32_e32 v54, v38, v48
1933 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[54:55]
1934 ; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
1935 ; SDAG-NEXT: v_or_b32_e32 v10, v20, v10
1936 ; SDAG-NEXT: v_mov_b32_e32 v21, v15
1937 ; SDAG-NEXT: v_mov_b32_e32 v20, v14
1938 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
1939 ; SDAG-NEXT: s_cbranch_execnz .LBB2_9
1940 ; SDAG-NEXT: ; %bb.10: ; %Flow
1941 ; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
1942 ; SDAG-NEXT: .LBB2_11: ; %Flow11
1943 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
1944 ; SDAG-NEXT: v_lshl_b64 v[12:13], v[12:13], 1
1945 ; SDAG-NEXT: v_lshrrev_b32_e32 v20, 31, v11
1946 ; SDAG-NEXT: v_lshl_b64 v[10:11], v[10:11], 1
1947 ; SDAG-NEXT: v_or_b32_e32 v12, v12, v20
1948 ; SDAG-NEXT: v_or_b32_e32 v19, v19, v13
1949 ; SDAG-NEXT: v_or_b32_e32 v15, v15, v11
1950 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v12
1951 ; SDAG-NEXT: v_or_b32_e32 v14, v14, v10
1952 ; SDAG-NEXT: .LBB2_12: ; %Flow12
1953 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
1954 ; SDAG-NEXT: v_mul_lo_u32 v12, v33, v3
1955 ; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v33, v2, 0
1956 ; SDAG-NEXT: v_mul_lo_u32 v24, v27, v2
1957 ; SDAG-NEXT: v_mul_lo_u32 v25, v35, v31
1958 ; SDAG-NEXT: v_mul_lo_u32 v35, v32, v30
1959 ; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v33, 0
1960 ; SDAG-NEXT: v_mov_b32_e32 v13, 0
1961 ; SDAG-NEXT: v_mul_lo_u32 v38, v14, v7
1962 ; SDAG-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v14, v6, 0
1963 ; SDAG-NEXT: v_mul_lo_u32 v39, v15, v6
1964 ; SDAG-NEXT: v_mul_lo_u32 v48, v19, v37
1965 ; SDAG-NEXT: v_mul_lo_u32 v49, v18, v36
1966 ; SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v37, v14, 0
1967 ; SDAG-NEXT: v_add_i32_e32 v11, vcc, v11, v12
1968 ; SDAG-NEXT: v_mov_b32_e32 v12, v3
1969 ; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v30, v33, v[12:13]
1970 ; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v2
1971 ; SDAG-NEXT: v_add_i32_e64 v19, s[4:5], v21, v38
1972 ; SDAG-NEXT: v_add_i32_e64 v11, s[4:5], v11, v24
1973 ; SDAG-NEXT: v_mov_b32_e32 v12, v22
1974 ; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v31, v27, v[12:13]
1975 ; SDAG-NEXT: v_xor_b32_e32 v24, v16, v28
1976 ; SDAG-NEXT: v_add_i32_e64 v21, s[4:5], v19, v39
1977 ; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v32, v31, v[10:11]
1978 ; SDAG-NEXT: v_add_i32_e64 v22, s[4:5], v23, v3
1979 ; SDAG-NEXT: v_addc_u32_e64 v23, s[4:5], 0, 0, s[4:5]
1980 ; SDAG-NEXT: v_subb_u32_e32 v31, vcc, v17, v2, vcc
1981 ; SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v18, v37, v[20:21]
1982 ; SDAG-NEXT: v_mov_b32_e32 v12, v7
1983 ; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v36, v14, v[12:13]
1984 ; SDAG-NEXT: v_add_i32_e64 v7, s[4:5], v25, v11
1985 ; SDAG-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v30, v27, v[22:23]
1986 ; SDAG-NEXT: v_xor_b32_e32 v14, v31, v29
1987 ; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v48, v3
1988 ; SDAG-NEXT: v_mov_b32_e32 v12, v16
1989 ; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v37, v15, v[12:13]
1990 ; SDAG-NEXT: v_add_i32_e64 v7, s[4:5], v35, v7
1991 ; SDAG-NEXT: v_add_i32_e64 v3, s[4:5], v49, v3
1992 ; SDAG-NEXT: v_add_i32_e64 v12, s[4:5], v17, v12
1993 ; SDAG-NEXT: v_addc_u32_e64 v13, s[4:5], 0, 0, s[4:5]
1994 ; SDAG-NEXT: v_add_i32_e64 v10, s[4:5], v18, v10
1995 ; SDAG-NEXT: v_addc_u32_e64 v7, s[4:5], v19, v7, s[4:5]
1996 ; SDAG-NEXT: v_subb_u32_e32 v0, vcc, v0, v10, vcc
1997 ; SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v36, v15, v[12:13]
1998 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v7, vcc
1999 ; SDAG-NEXT: v_xor_b32_e32 v7, v0, v28
2000 ; SDAG-NEXT: v_add_i32_e32 v10, vcc, v12, v2
2001 ; SDAG-NEXT: v_addc_u32_e32 v12, vcc, v13, v3, vcc
2002 ; SDAG-NEXT: v_xor_b32_e32 v3, v1, v29
2003 ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v24, v28
2004 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v14, v29, vcc
2005 ; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v7, v28, vcc
2006 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v29, vcc
2007 ; SDAG-NEXT: v_sub_i32_e32 v6, vcc, v8, v6
2008 ; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v9, v11, vcc
2009 ; SDAG-NEXT: v_xor_b32_e32 v6, v6, v26
2010 ; SDAG-NEXT: v_subb_u32_e32 v4, vcc, v4, v10, vcc
2011 ; SDAG-NEXT: v_xor_b32_e32 v7, v7, v34
2012 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v12, vcc
2013 ; SDAG-NEXT: v_xor_b32_e32 v8, v4, v26
2014 ; SDAG-NEXT: v_xor_b32_e32 v9, v5, v34
2015 ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v6, v26
2016 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v7, v34, vcc
2017 ; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v8, v26, vcc
2018 ; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v9, v34, vcc
2019 ; SDAG-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
2020 ; SDAG-NEXT: s_waitcnt vmcnt(0)
2021 ; SDAG-NEXT: s_setpc_b64 s[30:31]
2023 ; GISEL-LABEL: v_srem_v2i128_vv:
2024 ; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
2025 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2026 ; GISEL-NEXT: v_ashrrev_i32_e32 v28, 31, v3
2027 ; GISEL-NEXT: v_ashrrev_i32_e32 v18, 31, v11
2028 ; GISEL-NEXT: v_mov_b32_e32 v19, 0x7f
2029 ; GISEL-NEXT: v_mov_b32_e32 v20, 0
2030 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
2031 ; GISEL-NEXT: v_xor_b32_e32 v0, v0, v28
2032 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v28
2033 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v28
2034 ; GISEL-NEXT: v_xor_b32_e32 v3, v3, v28
2035 ; GISEL-NEXT: v_xor_b32_e32 v8, v8, v18
2036 ; GISEL-NEXT: v_xor_b32_e32 v9, v9, v18
2037 ; GISEL-NEXT: v_xor_b32_e32 v10, v10, v18
2038 ; GISEL-NEXT: v_xor_b32_e32 v11, v11, v18
2039 ; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v0, v28
2040 ; GISEL-NEXT: v_subb_u32_e32 v17, vcc, v1, v28, vcc
2041 ; GISEL-NEXT: v_sub_i32_e64 v30, s[4:5], v8, v18
2042 ; GISEL-NEXT: v_subb_u32_e64 v29, s[4:5], v9, v18, s[4:5]
2043 ; GISEL-NEXT: v_subb_u32_e32 v8, vcc, v2, v28, vcc
2044 ; GISEL-NEXT: v_subb_u32_e32 v9, vcc, v3, v28, vcc
2045 ; GISEL-NEXT: v_subb_u32_e64 v10, vcc, v10, v18, s[4:5]
2046 ; GISEL-NEXT: v_subb_u32_e32 v11, vcc, v11, v18, vcc
2047 ; GISEL-NEXT: v_ffbh_u32_e32 v18, v29
2048 ; GISEL-NEXT: v_ffbh_u32_e32 v21, v30
2049 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v17
2050 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v16
2051 ; GISEL-NEXT: v_or_b32_e32 v0, v30, v10
2052 ; GISEL-NEXT: v_or_b32_e32 v1, v29, v11
2053 ; GISEL-NEXT: v_or_b32_e32 v2, v16, v8
2054 ; GISEL-NEXT: v_or_b32_e32 v3, v17, v9
2055 ; GISEL-NEXT: v_add_i32_e32 v21, vcc, 32, v21
2056 ; GISEL-NEXT: v_add_i32_e32 v23, vcc, 32, v23
2057 ; GISEL-NEXT: v_ffbh_u32_e32 v24, v10
2058 ; GISEL-NEXT: v_ffbh_u32_e32 v25, v11
2059 ; GISEL-NEXT: v_ffbh_u32_e32 v26, v8
2060 ; GISEL-NEXT: v_ffbh_u32_e32 v27, v9
2061 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2062 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[2:3]
2063 ; GISEL-NEXT: v_min_u32_e32 v0, v18, v21
2064 ; GISEL-NEXT: v_min_u32_e32 v1, v22, v23
2065 ; GISEL-NEXT: v_add_i32_e64 v2, s[6:7], 32, v24
2066 ; GISEL-NEXT: v_add_i32_e64 v3, s[6:7], 32, v26
2067 ; GISEL-NEXT: v_min_u32_e32 v2, v25, v2
2068 ; GISEL-NEXT: v_min_u32_e32 v3, v27, v3
2069 ; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0
2070 ; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 64, v1
2071 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2072 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, s[4:5]
2073 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
2074 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc
2075 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[8:9]
2076 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc
2077 ; GISEL-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
2078 ; GISEL-NEXT: v_subb_u32_e64 v3, s[4:5], 0, 0, vcc
2079 ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
2080 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
2081 ; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v2
2082 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[19:20]
2083 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
2084 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v0
2085 ; GISEL-NEXT: v_or_b32_e32 v19, v3, v1
2086 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
2087 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
2088 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2089 ; GISEL-NEXT: v_cndmask_b32_e32 v20, v22, v20, vcc
2090 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
2091 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
2092 ; GISEL-NEXT: v_or_b32_e32 v19, v21, v20
2093 ; GISEL-NEXT: v_or_b32_e32 v18, v19, v18
2094 ; GISEL-NEXT: v_and_b32_e32 v19, 1, v19
2095 ; GISEL-NEXT: v_and_b32_e32 v18, 1, v18
2096 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v19
2097 ; GISEL-NEXT: v_cndmask_b32_e64 v31, v16, 0, vcc
2098 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v18
2099 ; GISEL-NEXT: v_cndmask_b32_e64 v18, v8, 0, vcc
2100 ; GISEL-NEXT: v_cndmask_b32_e64 v19, v9, 0, vcc
2101 ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1
2102 ; GISEL-NEXT: v_cndmask_b32_e64 v32, v17, 0, vcc
2103 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
2104 ; GISEL-NEXT: s_cbranch_execz .LBB2_6
2105 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
2106 ; GISEL-NEXT: v_add_i32_e32 v31, vcc, 1, v2
2107 ; GISEL-NEXT: v_addc_u32_e64 v32, s[4:5], 0, v3, vcc
2108 ; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v2
2109 ; GISEL-NEXT: v_not_b32_e32 v2, 63
2110 ; GISEL-NEXT: v_addc_u32_e64 v33, vcc, 0, v0, s[4:5]
2111 ; GISEL-NEXT: v_addc_u32_e32 v34, vcc, 0, v1, vcc
2112 ; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2
2113 ; GISEL-NEXT: v_sub_i32_e64 v18, s[4:5], 64, v24
2114 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[16:17], v24
2115 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[8:9], v24
2116 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
2117 ; GISEL-NEXT: v_lshr_b64 v[18:19], v[16:17], v18
2118 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[16:17], v20
2119 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
2120 ; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v0, vcc
2121 ; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v1, vcc
2122 ; GISEL-NEXT: v_or_b32_e32 v0, v18, v2
2123 ; GISEL-NEXT: v_or_b32_e32 v1, v19, v3
2124 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc
2125 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc
2126 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
2127 ; GISEL-NEXT: v_cndmask_b32_e32 v18, v0, v8, vcc
2128 ; GISEL-NEXT: v_cndmask_b32_e32 v19, v1, v9, vcc
2129 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
2130 ; GISEL-NEXT: v_mov_b32_e32 v0, s8
2131 ; GISEL-NEXT: v_mov_b32_e32 v1, s9
2132 ; GISEL-NEXT: v_mov_b32_e32 v2, s10
2133 ; GISEL-NEXT: v_mov_b32_e32 v3, s11
2134 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
2135 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
2136 ; GISEL-NEXT: s_cbranch_execz .LBB2_5
2137 ; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
2138 ; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v31
2139 ; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v31
2140 ; GISEL-NEXT: v_lshr_b64 v[0:1], v[8:9], v31
2141 ; GISEL-NEXT: v_lshr_b64 v[2:3], v[16:17], v31
2142 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
2143 ; GISEL-NEXT: v_add_i32_e32 v35, vcc, -1, v30
2144 ; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v29, vcc
2145 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[8:9], v22
2146 ; GISEL-NEXT: v_lshr_b64 v[24:25], v[8:9], v24
2147 ; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v10, vcc
2148 ; GISEL-NEXT: v_addc_u32_e32 v38, vcc, -1, v11, vcc
2149 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
2150 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v22
2151 ; GISEL-NEXT: v_or_b32_e32 v3, v3, v23
2152 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v31
2153 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v24, v2, vcc
2154 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v25, v3, vcc
2155 ; GISEL-NEXT: v_cndmask_b32_e32 v24, 0, v0, vcc
2156 ; GISEL-NEXT: v_cndmask_b32_e32 v25, 0, v1, vcc
2157 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v31
2158 ; GISEL-NEXT: v_cndmask_b32_e32 v26, v2, v16, vcc
2159 ; GISEL-NEXT: v_cndmask_b32_e32 v27, v3, v17, vcc
2160 ; GISEL-NEXT: v_mov_b32_e32 v23, 0
2161 ; GISEL-NEXT: v_mov_b32_e32 v0, s4
2162 ; GISEL-NEXT: v_mov_b32_e32 v1, s5
2163 ; GISEL-NEXT: v_mov_b32_e32 v2, s6
2164 ; GISEL-NEXT: v_mov_b32_e32 v3, s7
2165 ; GISEL-NEXT: .LBB2_3: ; %udiv-do-while3
2166 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
2167 ; GISEL-NEXT: v_lshrrev_b32_e32 v39, 31, v21
2168 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1
2169 ; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v27
2170 ; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
2171 ; GISEL-NEXT: v_lshl_b64 v[24:25], v[24:25], 1
2172 ; GISEL-NEXT: v_lshrrev_b32_e32 v48, 31, v19
2173 ; GISEL-NEXT: v_add_i32_e32 v31, vcc, -1, v31
2174 ; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc
2175 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
2176 ; GISEL-NEXT: v_or_b32_e32 v20, v0, v2
2177 ; GISEL-NEXT: v_or_b32_e32 v21, v1, v3
2178 ; GISEL-NEXT: v_or_b32_e32 v2, v24, v22
2179 ; GISEL-NEXT: v_or_b32_e32 v3, v26, v48
2180 ; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
2181 ; GISEL-NEXT: v_addc_u32_e32 v34, vcc, -1, v34, vcc
2182 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v35, v3
2183 ; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v36, v27, vcc
2184 ; GISEL-NEXT: v_or_b32_e32 v0, v31, v33
2185 ; GISEL-NEXT: v_or_b32_e32 v1, v32, v34
2186 ; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v37, v2, vcc
2187 ; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v38, v25, vcc
2188 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2189 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v22
2190 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2191 ; GISEL-NEXT: v_and_b32_e32 v1, v0, v30
2192 ; GISEL-NEXT: v_and_b32_e32 v24, v0, v29
2193 ; GISEL-NEXT: v_and_b32_e32 v48, v0, v10
2194 ; GISEL-NEXT: v_and_b32_e32 v49, v0, v11
2195 ; GISEL-NEXT: v_and_b32_e32 v22, 1, v0
2196 ; GISEL-NEXT: v_sub_i32_e32 v26, vcc, v3, v1
2197 ; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v24, vcc
2198 ; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v2, v48, vcc
2199 ; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v25, v49, vcc
2200 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v39
2201 ; GISEL-NEXT: v_mov_b32_e32 v0, v22
2202 ; GISEL-NEXT: v_mov_b32_e32 v1, v23
2203 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
2204 ; GISEL-NEXT: s_cbranch_execnz .LBB2_3
2205 ; GISEL-NEXT: ; %bb.4: ; %Flow13
2206 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
2207 ; GISEL-NEXT: .LBB2_5: ; %Flow14
2208 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
2209 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1
2210 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
2211 ; GISEL-NEXT: v_lshrrev_b32_e32 v20, 31, v21
2212 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v20
2213 ; GISEL-NEXT: v_or_b32_e32 v31, v0, v2
2214 ; GISEL-NEXT: v_or_b32_e32 v32, v1, v3
2215 ; GISEL-NEXT: .LBB2_6: ; %Flow16
2216 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
2217 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
2218 ; GISEL-NEXT: v_ashrrev_i32_e32 v33, 31, v7
2219 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v15
2220 ; GISEL-NEXT: v_mov_b32_e32 v2, 0x7f
2221 ; GISEL-NEXT: v_mov_b32_e32 v3, 0
2222 ; GISEL-NEXT: v_xor_b32_e32 v1, v4, v33
2223 ; GISEL-NEXT: v_xor_b32_e32 v4, v5, v33
2224 ; GISEL-NEXT: v_xor_b32_e32 v5, v6, v33
2225 ; GISEL-NEXT: v_xor_b32_e32 v7, v7, v33
2226 ; GISEL-NEXT: v_xor_b32_e32 v6, v12, v0
2227 ; GISEL-NEXT: v_xor_b32_e32 v20, v13, v0
2228 ; GISEL-NEXT: v_xor_b32_e32 v14, v14, v0
2229 ; GISEL-NEXT: v_xor_b32_e32 v15, v15, v0
2230 ; GISEL-NEXT: v_sub_i32_e32 v12, vcc, v1, v33
2231 ; GISEL-NEXT: v_subb_u32_e32 v13, vcc, v4, v33, vcc
2232 ; GISEL-NEXT: v_sub_i32_e64 v35, s[4:5], v6, v0
2233 ; GISEL-NEXT: v_subb_u32_e64 v34, s[4:5], v20, v0, s[4:5]
2234 ; GISEL-NEXT: v_subb_u32_e32 v6, vcc, v5, v33, vcc
2235 ; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v33, vcc
2236 ; GISEL-NEXT: v_subb_u32_e64 v4, vcc, v14, v0, s[4:5]
2237 ; GISEL-NEXT: v_subb_u32_e32 v5, vcc, v15, v0, vcc
2238 ; GISEL-NEXT: v_ffbh_u32_e32 v20, v34
2239 ; GISEL-NEXT: v_ffbh_u32_e32 v21, v35
2240 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v13
2241 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v12
2242 ; GISEL-NEXT: v_or_b32_e32 v0, v35, v4
2243 ; GISEL-NEXT: v_or_b32_e32 v1, v34, v5
2244 ; GISEL-NEXT: v_or_b32_e32 v14, v12, v6
2245 ; GISEL-NEXT: v_or_b32_e32 v15, v13, v7
2246 ; GISEL-NEXT: v_add_i32_e32 v21, vcc, 32, v21
2247 ; GISEL-NEXT: v_ffbh_u32_e32 v24, v5
2248 ; GISEL-NEXT: v_ffbh_u32_e32 v25, v4
2249 ; GISEL-NEXT: v_add_i32_e32 v23, vcc, 32, v23
2250 ; GISEL-NEXT: v_ffbh_u32_e32 v26, v7
2251 ; GISEL-NEXT: v_ffbh_u32_e32 v27, v6
2252 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2253 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[14:15]
2254 ; GISEL-NEXT: v_min_u32_e32 v0, v20, v21
2255 ; GISEL-NEXT: v_add_i32_e64 v1, s[6:7], 32, v25
2256 ; GISEL-NEXT: v_min_u32_e32 v14, v22, v23
2257 ; GISEL-NEXT: v_add_i32_e64 v15, s[6:7], 32, v27
2258 ; GISEL-NEXT: v_add_i32_e64 v0, s[6:7], 64, v0
2259 ; GISEL-NEXT: v_min_u32_e32 v1, v24, v1
2260 ; GISEL-NEXT: v_add_i32_e64 v14, s[6:7], 64, v14
2261 ; GISEL-NEXT: v_min_u32_e32 v15, v26, v15
2262 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2263 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, s[4:5]
2264 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[4:5]
2265 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
2266 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
2267 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v15, v14, vcc
2268 ; GISEL-NEXT: v_sub_i32_e32 v14, vcc, v0, v1
2269 ; GISEL-NEXT: v_subb_u32_e64 v15, s[4:5], 0, 0, vcc
2270 ; GISEL-NEXT: v_subb_u32_e64 v0, s[4:5], 0, 0, s[4:5]
2271 ; GISEL-NEXT: v_subb_u32_e64 v1, s[4:5], 0, 0, s[4:5]
2272 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[14:15], v[2:3]
2273 ; GISEL-NEXT: v_cndmask_b32_e64 v21, 0, 1, vcc
2274 ; GISEL-NEXT: v_xor_b32_e32 v2, 0x7f, v14
2275 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[0:1]
2276 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, vcc
2277 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v0
2278 ; GISEL-NEXT: v_or_b32_e32 v3, v15, v1
2279 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2280 ; GISEL-NEXT: v_cndmask_b32_e32 v21, v22, v21, vcc
2281 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
2282 ; GISEL-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
2283 ; GISEL-NEXT: v_or_b32_e32 v3, v20, v21
2284 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v3
2285 ; GISEL-NEXT: v_or_b32_e32 v2, v3, v2
2286 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v20
2287 ; GISEL-NEXT: v_cndmask_b32_e64 v20, v12, 0, vcc
2288 ; GISEL-NEXT: v_and_b32_e32 v22, 1, v2
2289 ; GISEL-NEXT: v_cndmask_b32_e64 v21, v13, 0, vcc
2290 ; GISEL-NEXT: v_cndmask_b32_e64 v2, v6, 0, vcc
2291 ; GISEL-NEXT: v_cndmask_b32_e64 v3, v7, 0, vcc
2292 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v22
2293 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
2294 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
2295 ; GISEL-NEXT: s_cbranch_execz .LBB2_12
2296 ; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
2297 ; GISEL-NEXT: v_add_i32_e32 v36, vcc, 1, v14
2298 ; GISEL-NEXT: v_addc_u32_e64 v37, s[4:5], 0, v15, vcc
2299 ; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 0x7f, v14
2300 ; GISEL-NEXT: v_not_b32_e32 v2, 63
2301 ; GISEL-NEXT: v_addc_u32_e64 v38, vcc, 0, v0, s[4:5]
2302 ; GISEL-NEXT: v_addc_u32_e32 v39, vcc, 0, v1, vcc
2303 ; GISEL-NEXT: v_add_i32_e64 v20, s[4:5], v24, v2
2304 ; GISEL-NEXT: v_sub_i32_e64 v14, s[4:5], 64, v24
2305 ; GISEL-NEXT: v_lshl_b64 v[0:1], v[12:13], v24
2306 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[6:7], v24
2307 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
2308 ; GISEL-NEXT: v_lshr_b64 v[14:15], v[12:13], v14
2309 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[12:13], v20
2310 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v24
2311 ; GISEL-NEXT: v_cndmask_b32_e32 v20, 0, v0, vcc
2312 ; GISEL-NEXT: v_cndmask_b32_e32 v21, 0, v1, vcc
2313 ; GISEL-NEXT: v_or_b32_e32 v0, v14, v2
2314 ; GISEL-NEXT: v_or_b32_e32 v1, v15, v3
2315 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v22, v0, vcc
2316 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v23, v1, vcc
2317 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24
2318 ; GISEL-NEXT: v_cndmask_b32_e32 v14, v0, v6, vcc
2319 ; GISEL-NEXT: v_cndmask_b32_e32 v15, v1, v7, vcc
2320 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
2321 ; GISEL-NEXT: v_mov_b32_e32 v0, s8
2322 ; GISEL-NEXT: v_mov_b32_e32 v1, s9
2323 ; GISEL-NEXT: v_mov_b32_e32 v2, s10
2324 ; GISEL-NEXT: v_mov_b32_e32 v3, s11
2325 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
2326 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
2327 ; GISEL-NEXT: s_cbranch_execz .LBB2_11
2328 ; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
2329 ; GISEL-NEXT: v_add_i32_e32 v24, vcc, 0xffffffc0, v36
2330 ; GISEL-NEXT: v_sub_i32_e32 v22, vcc, 64, v36
2331 ; GISEL-NEXT: v_lshr_b64 v[0:1], v[6:7], v36
2332 ; GISEL-NEXT: v_lshr_b64 v[2:3], v[12:13], v36
2333 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
2334 ; GISEL-NEXT: v_add_i32_e32 v48, vcc, -1, v35
2335 ; GISEL-NEXT: v_addc_u32_e32 v49, vcc, -1, v34, vcc
2336 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[6:7], v22
2337 ; GISEL-NEXT: v_lshr_b64 v[24:25], v[6:7], v24
2338 ; GISEL-NEXT: v_addc_u32_e32 v50, vcc, -1, v4, vcc
2339 ; GISEL-NEXT: v_addc_u32_e32 v51, vcc, -1, v5, vcc
2340 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
2341 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v22
2342 ; GISEL-NEXT: v_or_b32_e32 v3, v3, v23
2343 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v36
2344 ; GISEL-NEXT: v_cndmask_b32_e32 v2, v24, v2, vcc
2345 ; GISEL-NEXT: v_cndmask_b32_e32 v3, v25, v3, vcc
2346 ; GISEL-NEXT: v_cndmask_b32_e32 v26, 0, v0, vcc
2347 ; GISEL-NEXT: v_cndmask_b32_e32 v27, 0, v1, vcc
2348 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v36
2349 ; GISEL-NEXT: v_cndmask_b32_e32 v24, v2, v12, vcc
2350 ; GISEL-NEXT: v_cndmask_b32_e32 v25, v3, v13, vcc
2351 ; GISEL-NEXT: v_mov_b32_e32 v23, 0
2352 ; GISEL-NEXT: v_mov_b32_e32 v0, s4
2353 ; GISEL-NEXT: v_mov_b32_e32 v1, s5
2354 ; GISEL-NEXT: v_mov_b32_e32 v2, s6
2355 ; GISEL-NEXT: v_mov_b32_e32 v3, s7
2356 ; GISEL-NEXT: .LBB2_9: ; %udiv-do-while
2357 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
2358 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[20:21], 1
2359 ; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v21
2360 ; GISEL-NEXT: v_lshl_b64 v[52:53], v[24:25], 1
2361 ; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
2362 ; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v25
2363 ; GISEL-NEXT: v_lshrrev_b32_e32 v25, 31, v15
2364 ; GISEL-NEXT: v_lshl_b64 v[14:15], v[14:15], 1
2365 ; GISEL-NEXT: v_add_i32_e32 v36, vcc, -1, v36
2366 ; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc
2367 ; GISEL-NEXT: v_or_b32_e32 v20, v0, v2
2368 ; GISEL-NEXT: v_or_b32_e32 v21, v1, v3
2369 ; GISEL-NEXT: v_or_b32_e32 v2, v26, v24
2370 ; GISEL-NEXT: v_or_b32_e32 v3, v52, v25
2371 ; GISEL-NEXT: v_or_b32_e32 v14, v14, v22
2372 ; GISEL-NEXT: v_addc_u32_e32 v38, vcc, -1, v38, vcc
2373 ; GISEL-NEXT: v_addc_u32_e32 v39, vcc, -1, v39, vcc
2374 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v48, v3
2375 ; GISEL-NEXT: v_subb_u32_e32 v0, vcc, v49, v53, vcc
2376 ; GISEL-NEXT: v_or_b32_e32 v0, v36, v38
2377 ; GISEL-NEXT: v_or_b32_e32 v1, v37, v39
2378 ; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v50, v2, vcc
2379 ; GISEL-NEXT: v_subb_u32_e32 v22, vcc, v51, v27, vcc
2380 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[0:1]
2381 ; GISEL-NEXT: v_ashrrev_i32_e32 v0, 31, v22
2382 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2383 ; GISEL-NEXT: v_and_b32_e32 v22, 1, v0
2384 ; GISEL-NEXT: v_and_b32_e32 v1, v0, v35
2385 ; GISEL-NEXT: v_and_b32_e32 v25, v0, v34
2386 ; GISEL-NEXT: v_and_b32_e32 v26, v0, v4
2387 ; GISEL-NEXT: v_and_b32_e32 v52, v0, v5
2388 ; GISEL-NEXT: v_sub_i32_e32 v24, vcc, v3, v1
2389 ; GISEL-NEXT: v_subb_u32_e32 v25, vcc, v53, v25, vcc
2390 ; GISEL-NEXT: v_mov_b32_e32 v0, v22
2391 ; GISEL-NEXT: v_mov_b32_e32 v1, v23
2392 ; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v2, v26, vcc
2393 ; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v52, vcc
2394 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
2395 ; GISEL-NEXT: s_cbranch_execnz .LBB2_9
2396 ; GISEL-NEXT: ; %bb.10: ; %Flow
2397 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
2398 ; GISEL-NEXT: .LBB2_11: ; %Flow11
2399 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
2400 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[20:21], 1
2401 ; GISEL-NEXT: v_lshl_b64 v[2:3], v[14:15], 1
2402 ; GISEL-NEXT: v_lshrrev_b32_e32 v14, 31, v21
2403 ; GISEL-NEXT: v_or_b32_e32 v2, v2, v14
2404 ; GISEL-NEXT: v_or_b32_e32 v20, v0, v22
2405 ; GISEL-NEXT: v_or_b32_e32 v21, v1, v23
2406 ; GISEL-NEXT: .LBB2_12: ; %Flow12
2407 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
2408 ; GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v30, v31, 0
2409 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v30, v18, 0
2410 ; GISEL-NEXT: v_mul_lo_u32 v24, v30, v19
2411 ; GISEL-NEXT: v_mul_lo_u32 v25, v29, v18
2412 ; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v35, v20, 0
2413 ; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v35, v2, 0
2414 ; GISEL-NEXT: v_mul_lo_u32 v26, v35, v3
2415 ; GISEL-NEXT: v_mul_lo_u32 v27, v34, v2
2416 ; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v29, v32, v[14:15]
2417 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v34, v21, v[22:23]
2418 ; GISEL-NEXT: v_mov_b32_e32 v22, v19
2419 ; GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v31, v[2:3]
2420 ; GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v4, v20, v[14:15]
2421 ; GISEL-NEXT: v_mad_u64_u32 v[1:2], vcc, v30, v32, v[1:2]
2422 ; GISEL-NEXT: v_mov_b32_e32 v23, v14
2423 ; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v35, v21, v[22:23]
2424 ; GISEL-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v29, v31, v[1:2]
2425 ; GISEL-NEXT: v_addc_u32_e64 v3, s[6:7], v3, v24, s[6:7]
2426 ; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[6:7], v34, v20, v[22:23]
2427 ; GISEL-NEXT: v_addc_u32_e64 v14, s[6:7], v15, v26, s[6:7]
2428 ; GISEL-NEXT: v_addc_u32_e32 v3, vcc, v3, v25, vcc
2429 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v16, v0
2430 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v17, v1, vcc
2431 ; GISEL-NEXT: v_xor_b32_e32 v15, v0, v28
2432 ; GISEL-NEXT: v_addc_u32_e64 v0, s[4:5], v14, v27, s[4:5]
2433 ; GISEL-NEXT: v_sub_i32_e64 v12, s[4:5], v12, v18
2434 ; GISEL-NEXT: v_subb_u32_e64 v14, s[4:5], v13, v22, s[4:5]
2435 ; GISEL-NEXT: v_xor_b32_e32 v16, v12, v33
2436 ; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v10, v32, v[3:4]
2437 ; GISEL-NEXT: v_xor_b32_e32 v1, v1, v28
2438 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v4, v21, v[0:1]
2439 ; GISEL-NEXT: v_xor_b32_e32 v14, v14, v33
2440 ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v11, v31, v[12:13]
2441 ; GISEL-NEXT: v_sub_i32_e64 v0, s[6:7], v15, v28
2442 ; GISEL-NEXT: v_subb_u32_e64 v1, s[6:7], v1, v28, s[6:7]
2443 ; GISEL-NEXT: v_mad_u64_u32 v[3:4], s[8:9], v5, v20, v[3:4]
2444 ; GISEL-NEXT: v_sub_i32_e64 v4, s[8:9], v16, v33
2445 ; GISEL-NEXT: v_subb_u32_e64 v5, s[8:9], v14, v33, s[8:9]
2446 ; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v8, v2, vcc
2447 ; GISEL-NEXT: v_subb_u32_e32 v8, vcc, v9, v10, vcc
2448 ; GISEL-NEXT: v_xor_b32_e32 v2, v2, v28
2449 ; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v23, s[4:5]
2450 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v3, vcc
2451 ; GISEL-NEXT: v_xor_b32_e32 v6, v6, v33
2452 ; GISEL-NEXT: v_xor_b32_e32 v7, v8, v28
2453 ; GISEL-NEXT: v_xor_b32_e32 v8, v3, v33
2454 ; GISEL-NEXT: v_subb_u32_e64 v2, vcc, v2, v28, s[6:7]
2455 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v7, v28, vcc
2456 ; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v33, s[8:9]
2457 ; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v8, v33, vcc
2458 ; GISEL-NEXT: s_setpc_b64 s[30:31]
2459 %shl = srem <2 x i128> %lhs, %rhs
2463 define <2 x i128> @v_urem_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
2464 ; SDAG-LABEL: v_urem_v2i128_vv:
2465 ; SDAG: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
2466 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2467 ; SDAG-NEXT: v_or_b32_e32 v17, v9, v11
2468 ; SDAG-NEXT: v_or_b32_e32 v16, v8, v10
2469 ; SDAG-NEXT: v_or_b32_e32 v19, v1, v3
2470 ; SDAG-NEXT: v_or_b32_e32 v18, v0, v2
2471 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v10
2472 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v11
2473 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v8
2474 ; SDAG-NEXT: v_ffbh_u32_e32 v23, v9
2475 ; SDAG-NEXT: v_ffbh_u32_e32 v24, v2
2476 ; SDAG-NEXT: v_ffbh_u32_e32 v25, v3
2477 ; SDAG-NEXT: v_ffbh_u32_e32 v26, v0
2478 ; SDAG-NEXT: v_ffbh_u32_e32 v27, v1
2479 ; SDAG-NEXT: v_mov_b32_e32 v28, 0
2480 ; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f
2481 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2482 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
2483 ; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20
2484 ; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v22
2485 ; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v24
2486 ; SDAG-NEXT: v_add_i32_e64 v19, s[6:7], 32, v26
2487 ; SDAG-NEXT: v_min_u32_e32 v16, v16, v21
2488 ; SDAG-NEXT: v_min_u32_e32 v17, v17, v23
2489 ; SDAG-NEXT: v_min_u32_e32 v18, v18, v25
2490 ; SDAG-NEXT: v_min_u32_e32 v19, v19, v27
2491 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
2492 ; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17
2493 ; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
2494 ; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19
2495 ; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
2496 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[10:11]
2497 ; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc
2498 ; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
2499 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3]
2500 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc
2501 ; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
2502 ; SDAG-NEXT: v_sub_i32_e32 v18, vcc, v16, v18
2503 ; SDAG-NEXT: v_subb_u32_e32 v19, vcc, v20, v17, vcc
2504 ; SDAG-NEXT: v_xor_b32_e32 v16, 0x7f, v18
2505 ; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v28, vcc
2506 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[18:19]
2507 ; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2508 ; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v28, vcc
2509 ; SDAG-NEXT: v_or_b32_e32 v16, v16, v20
2510 ; SDAG-NEXT: v_or_b32_e32 v17, v19, v21
2511 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
2512 ; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
2513 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[16:17]
2514 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[20:21]
2515 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v23, v22, s[4:5]
2516 ; SDAG-NEXT: v_and_b32_e32 v16, 1, v16
2517 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v16
2518 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
2519 ; SDAG-NEXT: v_cndmask_b32_e64 v33, v3, 0, s[4:5]
2520 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
2521 ; SDAG-NEXT: v_cndmask_b32_e64 v31, v2, 0, s[4:5]
2522 ; SDAG-NEXT: v_cndmask_b32_e64 v30, v1, 0, s[4:5]
2523 ; SDAG-NEXT: s_and_b64 s[8:9], s[6:7], vcc
2524 ; SDAG-NEXT: v_cndmask_b32_e64 v32, v0, 0, s[4:5]
2525 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[8:9]
2526 ; SDAG-NEXT: s_cbranch_execz .LBB3_6
2527 ; SDAG-NEXT: ; %bb.1: ; %udiv-bb15
2528 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, 1, v18
2529 ; SDAG-NEXT: v_sub_i32_e64 v22, s[4:5], 63, v18
2530 ; SDAG-NEXT: v_mov_b32_e32 v16, 0
2531 ; SDAG-NEXT: v_mov_b32_e32 v17, 0
2532 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, 0, v19, vcc
2533 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[0:1], v22
2534 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, 0, v20, vcc
2535 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, 0, v21, vcc
2536 ; SDAG-NEXT: v_or_b32_e32 v19, v30, v32
2537 ; SDAG-NEXT: v_sub_i32_e32 v21, vcc, 0x7f, v18
2538 ; SDAG-NEXT: v_or_b32_e32 v20, v31, v33
2539 ; SDAG-NEXT: v_lshl_b64 v[24:25], v[2:3], v21
2540 ; SDAG-NEXT: v_sub_i32_e32 v18, vcc, 64, v21
2541 ; SDAG-NEXT: v_lshl_b64 v[26:27], v[0:1], v21
2542 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[19:20]
2543 ; SDAG-NEXT: v_lshr_b64 v[18:19], v[0:1], v18
2544 ; SDAG-NEXT: v_or_b32_e32 v19, v25, v19
2545 ; SDAG-NEXT: v_or_b32_e32 v18, v24, v18
2546 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v21
2547 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v23, v19, s[4:5]
2548 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v22, v18, s[4:5]
2549 ; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, v27, s[4:5]
2550 ; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, v26, s[4:5]
2551 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v21
2552 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v19, v3, s[4:5]
2553 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v18, v2, s[4:5]
2554 ; SDAG-NEXT: v_mov_b32_e32 v20, 0
2555 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
2556 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
2557 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
2558 ; SDAG-NEXT: s_cbranch_execz .LBB3_5
2559 ; SDAG-NEXT: ; %bb.2: ; %udiv-preheader4
2560 ; SDAG-NEXT: v_lshr_b64 v[16:17], v[0:1], v30
2561 ; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 64, v30
2562 ; SDAG-NEXT: v_subrev_i32_e32 v35, vcc, 64, v30
2563 ; SDAG-NEXT: v_lshr_b64 v[26:27], v[2:3], v30
2564 ; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v8
2565 ; SDAG-NEXT: s_mov_b64 s[10:11], 0
2566 ; SDAG-NEXT: v_mov_b32_e32 v24, 0
2567 ; SDAG-NEXT: v_mov_b32_e32 v25, 0
2568 ; SDAG-NEXT: v_mov_b32_e32 v20, 0
2569 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
2570 ; SDAG-NEXT: v_lshl_b64 v[28:29], v[2:3], v28
2571 ; SDAG-NEXT: v_lshr_b64 v[37:38], v[2:3], v35
2572 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v9, vcc
2573 ; SDAG-NEXT: v_or_b32_e32 v17, v17, v29
2574 ; SDAG-NEXT: v_or_b32_e32 v16, v16, v28
2575 ; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v10, vcc
2576 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v30
2577 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v38, v17, s[4:5]
2578 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v37, v16, s[4:5]
2579 ; SDAG-NEXT: v_cndmask_b32_e64 v29, 0, v27, s[4:5]
2580 ; SDAG-NEXT: v_cndmask_b32_e64 v28, 0, v26, s[4:5]
2581 ; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v11, vcc
2582 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
2583 ; SDAG-NEXT: v_cndmask_b32_e32 v27, v17, v1, vcc
2584 ; SDAG-NEXT: v_cndmask_b32_e32 v26, v16, v0, vcc
2585 ; SDAG-NEXT: v_mov_b32_e32 v17, 0
2586 ; SDAG-NEXT: .LBB3_3: ; %udiv-do-while3
2587 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
2588 ; SDAG-NEXT: v_lshrrev_b32_e32 v16, 31, v23
2589 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
2590 ; SDAG-NEXT: v_lshl_b64 v[28:29], v[28:29], 1
2591 ; SDAG-NEXT: v_lshrrev_b32_e32 v38, 31, v27
2592 ; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
2593 ; SDAG-NEXT: v_lshrrev_b32_e32 v39, 31, v19
2594 ; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
2595 ; SDAG-NEXT: v_or_b32_e32 v23, v25, v23
2596 ; SDAG-NEXT: v_or_b32_e32 v22, v24, v22
2597 ; SDAG-NEXT: v_or_b32_e32 v24, v28, v38
2598 ; SDAG-NEXT: v_or_b32_e32 v25, v26, v39
2599 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v16
2600 ; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v34, v25
2601 ; SDAG-NEXT: v_subb_u32_e32 v16, vcc, v35, v27, vcc
2602 ; SDAG-NEXT: v_subb_u32_e32 v16, vcc, v36, v24, vcc
2603 ; SDAG-NEXT: v_subb_u32_e32 v16, vcc, v37, v29, vcc
2604 ; SDAG-NEXT: v_ashrrev_i32_e32 v16, 31, v16
2605 ; SDAG-NEXT: v_and_b32_e32 v26, v16, v8
2606 ; SDAG-NEXT: v_and_b32_e32 v28, v16, v9
2607 ; SDAG-NEXT: v_and_b32_e32 v38, v16, v10
2608 ; SDAG-NEXT: v_and_b32_e32 v39, v16, v11
2609 ; SDAG-NEXT: v_and_b32_e32 v16, 1, v16
2610 ; SDAG-NEXT: v_sub_i32_e32 v26, vcc, v25, v26
2611 ; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v28, vcc
2612 ; SDAG-NEXT: v_subb_u32_e32 v28, vcc, v24, v38, vcc
2613 ; SDAG-NEXT: v_subb_u32_e32 v29, vcc, v29, v39, vcc
2614 ; SDAG-NEXT: v_add_i32_e32 v30, vcc, -1, v30
2615 ; SDAG-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc
2616 ; SDAG-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc
2617 ; SDAG-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
2618 ; SDAG-NEXT: v_or_b32_e32 v24, v30, v32
2619 ; SDAG-NEXT: v_or_b32_e32 v25, v31, v33
2620 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[24:25]
2621 ; SDAG-NEXT: v_or_b32_e32 v19, v21, v19
2622 ; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
2623 ; SDAG-NEXT: v_or_b32_e32 v18, v20, v18
2624 ; SDAG-NEXT: v_mov_b32_e32 v25, v17
2625 ; SDAG-NEXT: v_mov_b32_e32 v24, v16
2626 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
2627 ; SDAG-NEXT: s_cbranch_execnz .LBB3_3
2628 ; SDAG-NEXT: ; %bb.4: ; %Flow13
2629 ; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
2630 ; SDAG-NEXT: .LBB3_5: ; %Flow14
2631 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
2632 ; SDAG-NEXT: v_lshl_b64 v[18:19], v[18:19], 1
2633 ; SDAG-NEXT: v_lshrrev_b32_e32 v24, 31, v23
2634 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
2635 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v24
2636 ; SDAG-NEXT: v_or_b32_e32 v33, v21, v19
2637 ; SDAG-NEXT: v_or_b32_e32 v30, v17, v23
2638 ; SDAG-NEXT: v_or_b32_e32 v31, v20, v18
2639 ; SDAG-NEXT: v_or_b32_e32 v32, v16, v22
2640 ; SDAG-NEXT: .LBB3_6: ; %Flow16
2641 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
2642 ; SDAG-NEXT: v_or_b32_e32 v17, v13, v15
2643 ; SDAG-NEXT: v_or_b32_e32 v16, v12, v14
2644 ; SDAG-NEXT: v_or_b32_e32 v19, v5, v7
2645 ; SDAG-NEXT: v_or_b32_e32 v18, v4, v6
2646 ; SDAG-NEXT: v_ffbh_u32_e32 v20, v14
2647 ; SDAG-NEXT: v_ffbh_u32_e32 v21, v15
2648 ; SDAG-NEXT: v_ffbh_u32_e32 v22, v12
2649 ; SDAG-NEXT: v_ffbh_u32_e32 v23, v13
2650 ; SDAG-NEXT: v_ffbh_u32_e32 v24, v6
2651 ; SDAG-NEXT: v_ffbh_u32_e32 v25, v7
2652 ; SDAG-NEXT: v_ffbh_u32_e32 v26, v4
2653 ; SDAG-NEXT: v_ffbh_u32_e32 v27, v5
2654 ; SDAG-NEXT: v_mov_b32_e32 v28, 0
2655 ; SDAG-NEXT: s_mov_b64 s[8:9], 0x7f
2656 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2657 ; SDAG-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
2658 ; SDAG-NEXT: v_add_i32_e64 v16, s[6:7], 32, v20
2659 ; SDAG-NEXT: v_add_i32_e64 v17, s[6:7], 32, v22
2660 ; SDAG-NEXT: v_add_i32_e64 v18, s[6:7], 32, v24
2661 ; SDAG-NEXT: v_add_i32_e64 v19, s[6:7], 32, v26
2662 ; SDAG-NEXT: s_or_b64 s[6:7], vcc, s[4:5]
2663 ; SDAG-NEXT: v_min_u32_e32 v16, v16, v21
2664 ; SDAG-NEXT: v_min_u32_e32 v17, v17, v23
2665 ; SDAG-NEXT: v_min_u32_e32 v18, v18, v25
2666 ; SDAG-NEXT: v_min_u32_e32 v19, v19, v27
2667 ; SDAG-NEXT: v_add_i32_e32 v17, vcc, 64, v17
2668 ; SDAG-NEXT: v_addc_u32_e64 v20, s[4:5], 0, 0, vcc
2669 ; SDAG-NEXT: v_add_i32_e32 v19, vcc, 64, v19
2670 ; SDAG-NEXT: v_addc_u32_e64 v21, s[4:5], 0, 0, vcc
2671 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[14:15]
2672 ; SDAG-NEXT: v_cndmask_b32_e64 v20, v20, 0, vcc
2673 ; SDAG-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
2674 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[6:7]
2675 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v21, 0, vcc
2676 ; SDAG-NEXT: v_cndmask_b32_e32 v18, v19, v18, vcc
2677 ; SDAG-NEXT: v_sub_i32_e32 v16, vcc, v16, v18
2678 ; SDAG-NEXT: v_subb_u32_e32 v17, vcc, v20, v17, vcc
2679 ; SDAG-NEXT: v_xor_b32_e32 v18, 0x7f, v16
2680 ; SDAG-NEXT: v_subbrev_u32_e32 v20, vcc, 0, v28, vcc
2681 ; SDAG-NEXT: v_cmp_lt_u64_e64 s[4:5], s[8:9], v[16:17]
2682 ; SDAG-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2683 ; SDAG-NEXT: v_subbrev_u32_e32 v21, vcc, 0, v28, vcc
2684 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v20
2685 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
2686 ; SDAG-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
2687 ; SDAG-NEXT: v_or_b32_e32 v19, v17, v21
2688 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
2689 ; SDAG-NEXT: v_cndmask_b32_e32 v22, v23, v22, vcc
2690 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[18:19]
2691 ; SDAG-NEXT: v_and_b32_e32 v18, 1, v22
2692 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v18
2693 ; SDAG-NEXT: s_or_b64 s[4:5], s[6:7], s[4:5]
2694 ; SDAG-NEXT: v_cndmask_b32_e64 v23, v7, 0, s[4:5]
2695 ; SDAG-NEXT: s_xor_b64 s[6:7], s[4:5], -1
2696 ; SDAG-NEXT: v_cndmask_b32_e64 v22, v6, 0, s[4:5]
2697 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v5, 0, s[4:5]
2698 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v4, 0, s[4:5]
2699 ; SDAG-NEXT: s_and_b64 s[4:5], s[6:7], vcc
2700 ; SDAG-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
2701 ; SDAG-NEXT: s_cbranch_execz .LBB3_12
2702 ; SDAG-NEXT: ; %bb.7: ; %udiv-bb1
2703 ; SDAG-NEXT: v_add_i32_e32 v34, vcc, 1, v16
2704 ; SDAG-NEXT: v_sub_i32_e64 v22, s[4:5], 63, v16
2705 ; SDAG-NEXT: v_mov_b32_e32 v18, 0
2706 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
2707 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, 0, v17, vcc
2708 ; SDAG-NEXT: v_lshl_b64 v[22:23], v[4:5], v22
2709 ; SDAG-NEXT: v_addc_u32_e32 v36, vcc, 0, v20, vcc
2710 ; SDAG-NEXT: v_addc_u32_e32 v37, vcc, 0, v21, vcc
2711 ; SDAG-NEXT: v_or_b32_e32 v20, v34, v36
2712 ; SDAG-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v16
2713 ; SDAG-NEXT: v_or_b32_e32 v21, v35, v37
2714 ; SDAG-NEXT: v_lshl_b64 v[16:17], v[6:7], v26
2715 ; SDAG-NEXT: v_sub_i32_e32 v27, vcc, 64, v26
2716 ; SDAG-NEXT: v_lshl_b64 v[24:25], v[4:5], v26
2717 ; SDAG-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[20:21]
2718 ; SDAG-NEXT: v_lshr_b64 v[20:21], v[4:5], v27
2719 ; SDAG-NEXT: v_or_b32_e32 v17, v17, v21
2720 ; SDAG-NEXT: v_or_b32_e32 v16, v16, v20
2721 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v26
2722 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v23, v17, s[4:5]
2723 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v22, v16, s[4:5]
2724 ; SDAG-NEXT: v_cndmask_b32_e64 v21, 0, v25, s[4:5]
2725 ; SDAG-NEXT: v_cndmask_b32_e64 v20, 0, v24, s[4:5]
2726 ; SDAG-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v26
2727 ; SDAG-NEXT: v_cndmask_b32_e64 v17, v17, v7, s[4:5]
2728 ; SDAG-NEXT: v_cndmask_b32_e64 v16, v16, v6, s[4:5]
2729 ; SDAG-NEXT: v_mov_b32_e32 v22, 0
2730 ; SDAG-NEXT: v_mov_b32_e32 v23, 0
2731 ; SDAG-NEXT: s_and_saveexec_b64 s[4:5], vcc
2732 ; SDAG-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
2733 ; SDAG-NEXT: s_cbranch_execz .LBB3_11
2734 ; SDAG-NEXT: ; %bb.8: ; %udiv-preheader
2735 ; SDAG-NEXT: v_lshr_b64 v[18:19], v[4:5], v34
2736 ; SDAG-NEXT: v_sub_i32_e32 v28, vcc, 64, v34
2737 ; SDAG-NEXT: v_subrev_i32_e32 v39, vcc, 64, v34
2738 ; SDAG-NEXT: v_lshr_b64 v[26:27], v[6:7], v34
2739 ; SDAG-NEXT: v_add_i32_e32 v38, vcc, -1, v12
2740 ; SDAG-NEXT: s_mov_b64 s[10:11], 0
2741 ; SDAG-NEXT: v_mov_b32_e32 v24, 0
2742 ; SDAG-NEXT: v_mov_b32_e32 v25, 0
2743 ; SDAG-NEXT: v_mov_b32_e32 v22, 0
2744 ; SDAG-NEXT: v_mov_b32_e32 v23, 0
2745 ; SDAG-NEXT: v_lshl_b64 v[28:29], v[6:7], v28
2746 ; SDAG-NEXT: v_lshr_b64 v[49:50], v[6:7], v39
2747 ; SDAG-NEXT: v_addc_u32_e32 v39, vcc, -1, v13, vcc
2748 ; SDAG-NEXT: v_or_b32_e32 v19, v19, v29
2749 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v28
2750 ; SDAG-NEXT: v_addc_u32_e32 v48, vcc, -1, v14, vcc
2751 ; SDAG-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v34
2752 ; SDAG-NEXT: v_cndmask_b32_e64 v19, v50, v19, s[4:5]
2753 ; SDAG-NEXT: v_cndmask_b32_e64 v18, v49, v18, s[4:5]
2754 ; SDAG-NEXT: v_cndmask_b32_e64 v29, 0, v27, s[4:5]
2755 ; SDAG-NEXT: v_cndmask_b32_e64 v28, 0, v26, s[4:5]
2756 ; SDAG-NEXT: v_addc_u32_e32 v49, vcc, -1, v15, vcc
2757 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v34
2758 ; SDAG-NEXT: v_cndmask_b32_e32 v27, v19, v5, vcc
2759 ; SDAG-NEXT: v_cndmask_b32_e32 v26, v18, v4, vcc
2760 ; SDAG-NEXT: v_mov_b32_e32 v19, 0
2761 ; SDAG-NEXT: .LBB3_9: ; %udiv-do-while
2762 ; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
2763 ; SDAG-NEXT: v_lshl_b64 v[28:29], v[28:29], 1
2764 ; SDAG-NEXT: v_lshrrev_b32_e32 v18, 31, v27
2765 ; SDAG-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
2766 ; SDAG-NEXT: v_lshrrev_b32_e32 v50, 31, v17
2767 ; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
2768 ; SDAG-NEXT: v_lshrrev_b32_e32 v51, 31, v21
2769 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
2770 ; SDAG-NEXT: v_or_b32_e32 v18, v28, v18
2771 ; SDAG-NEXT: v_or_b32_e32 v26, v26, v50
2772 ; SDAG-NEXT: v_or_b32_e32 v16, v16, v51
2773 ; SDAG-NEXT: v_or_b32_e32 v17, v23, v17
2774 ; SDAG-NEXT: v_or_b32_e32 v21, v25, v21
2775 ; SDAG-NEXT: v_sub_i32_e32 v25, vcc, v38, v26
2776 ; SDAG-NEXT: v_or_b32_e32 v16, v22, v16
2777 ; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v39, v27, vcc
2778 ; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v48, v18, vcc
2779 ; SDAG-NEXT: v_subb_u32_e32 v25, vcc, v49, v29, vcc
2780 ; SDAG-NEXT: v_ashrrev_i32_e32 v25, 31, v25
2781 ; SDAG-NEXT: v_and_b32_e32 v28, v25, v12
2782 ; SDAG-NEXT: v_and_b32_e32 v50, v25, v13
2783 ; SDAG-NEXT: v_and_b32_e32 v51, v25, v14
2784 ; SDAG-NEXT: v_and_b32_e32 v52, v25, v15
2785 ; SDAG-NEXT: v_sub_i32_e32 v26, vcc, v26, v28
2786 ; SDAG-NEXT: v_subb_u32_e32 v27, vcc, v27, v50, vcc
2787 ; SDAG-NEXT: v_subb_u32_e32 v28, vcc, v18, v51, vcc
2788 ; SDAG-NEXT: v_subb_u32_e32 v29, vcc, v29, v52, vcc
2789 ; SDAG-NEXT: v_add_i32_e32 v34, vcc, -1, v34
2790 ; SDAG-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc
2791 ; SDAG-NEXT: v_addc_u32_e32 v36, vcc, -1, v36, vcc
2792 ; SDAG-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc
2793 ; SDAG-NEXT: v_or_b32_e32 v50, v34, v36
2794 ; SDAG-NEXT: v_or_b32_e32 v51, v35, v37
2795 ; SDAG-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[50:51]
2796 ; SDAG-NEXT: v_and_b32_e32 v18, 1, v25
2797 ; SDAG-NEXT: s_or_b64 s[10:11], vcc, s[10:11]
2798 ; SDAG-NEXT: v_or_b32_e32 v20, v24, v20
2799 ; SDAG-NEXT: v_mov_b32_e32 v25, v19
2800 ; SDAG-NEXT: v_mov_b32_e32 v24, v18
2801 ; SDAG-NEXT: s_andn2_b64 exec, exec, s[10:11]
2802 ; SDAG-NEXT: s_cbranch_execnz .LBB3_9
2803 ; SDAG-NEXT: ; %bb.10: ; %Flow
2804 ; SDAG-NEXT: s_or_b64 exec, exec, s[10:11]
2805 ; SDAG-NEXT: .LBB3_11: ; %Flow11
2806 ; SDAG-NEXT: s_or_b64 exec, exec, s[8:9]
2807 ; SDAG-NEXT: v_lshl_b64 v[16:17], v[16:17], 1
2808 ; SDAG-NEXT: v_lshrrev_b32_e32 v24, 31, v21
2809 ; SDAG-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
2810 ; SDAG-NEXT: v_or_b32_e32 v16, v16, v24
2811 ; SDAG-NEXT: v_or_b32_e32 v23, v23, v17
2812 ; SDAG-NEXT: v_or_b32_e32 v19, v19, v21
2813 ; SDAG-NEXT: v_or_b32_e32 v22, v22, v16
2814 ; SDAG-NEXT: v_or_b32_e32 v18, v18, v20
2815 ; SDAG-NEXT: .LBB3_12: ; %Flow12
2816 ; SDAG-NEXT: s_or_b64 exec, exec, s[6:7]
2817 ; SDAG-NEXT: v_mul_lo_u32 v20, v32, v11
2818 ; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v32, v10, 0
2819 ; SDAG-NEXT: v_mul_lo_u32 v28, v30, v10
2820 ; SDAG-NEXT: v_mul_lo_u32 v29, v33, v8
2821 ; SDAG-NEXT: v_mul_lo_u32 v33, v31, v9
2822 ; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v8, v32, 0
2823 ; SDAG-NEXT: v_mov_b32_e32 v21, 0
2824 ; SDAG-NEXT: v_mul_lo_u32 v34, v18, v15
2825 ; SDAG-NEXT: v_mad_u64_u32 v[24:25], s[4:5], v18, v14, 0
2826 ; SDAG-NEXT: v_mul_lo_u32 v35, v19, v14
2827 ; SDAG-NEXT: v_mul_lo_u32 v36, v23, v12
2828 ; SDAG-NEXT: v_mul_lo_u32 v37, v22, v13
2829 ; SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v12, v18, 0
2830 ; SDAG-NEXT: v_add_i32_e32 v17, vcc, v17, v20
2831 ; SDAG-NEXT: v_mov_b32_e32 v20, v11
2832 ; SDAG-NEXT: v_mad_u64_u32 v[26:27], s[4:5], v9, v32, v[20:21]
2833 ; SDAG-NEXT: v_sub_i32_e32 v0, vcc, v0, v10
2834 ; SDAG-NEXT: v_add_i32_e64 v23, s[4:5], v25, v34
2835 ; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v17, v28
2836 ; SDAG-NEXT: v_mov_b32_e32 v20, v26
2837 ; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v8, v30, v[20:21]
2838 ; SDAG-NEXT: v_add_i32_e64 v25, s[4:5], v23, v35
2839 ; SDAG-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v31, v8, v[16:17]
2840 ; SDAG-NEXT: v_add_i32_e64 v26, s[4:5], v27, v11
2841 ; SDAG-NEXT: v_addc_u32_e64 v27, s[4:5], 0, 0, s[4:5]
2842 ; SDAG-NEXT: v_subb_u32_e32 v1, vcc, v1, v10, vcc
2843 ; SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v22, v12, v[24:25]
2844 ; SDAG-NEXT: v_mov_b32_e32 v20, v15
2845 ; SDAG-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v13, v18, v[20:21]
2846 ; SDAG-NEXT: v_add_i32_e64 v15, s[4:5], v29, v17
2847 ; SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v9, v30, v[26:27]
2848 ; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v36, v11
2849 ; SDAG-NEXT: v_mov_b32_e32 v20, v22
2850 ; SDAG-NEXT: v_mad_u64_u32 v[11:12], s[4:5], v12, v19, v[20:21]
2851 ; SDAG-NEXT: v_add_i32_e64 v15, s[4:5], v33, v15
2852 ; SDAG-NEXT: v_add_i32_e64 v20, s[4:5], v37, v17
2853 ; SDAG-NEXT: v_add_i32_e64 v17, s[4:5], v23, v12
2854 ; SDAG-NEXT: v_addc_u32_e64 v18, s[4:5], 0, 0, s[4:5]
2855 ; SDAG-NEXT: v_add_i32_e64 v8, s[4:5], v8, v16
2856 ; SDAG-NEXT: v_addc_u32_e64 v12, s[4:5], v9, v15, s[4:5]
2857 ; SDAG-NEXT: v_subb_u32_e32 v2, vcc, v2, v8, vcc
2858 ; SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v19, v[17:18]
2859 ; SDAG-NEXT: v_subb_u32_e32 v3, vcc, v3, v12, vcc
2860 ; SDAG-NEXT: v_add_i32_e32 v8, vcc, v8, v10
2861 ; SDAG-NEXT: v_addc_u32_e32 v9, vcc, v9, v20, vcc
2862 ; SDAG-NEXT: v_sub_i32_e32 v4, vcc, v4, v14
2863 ; SDAG-NEXT: v_subb_u32_e32 v5, vcc, v5, v11, vcc
2864 ; SDAG-NEXT: v_subb_u32_e32 v6, vcc, v6, v8, vcc
2865 ; SDAG-NEXT: v_subb_u32_e32 v7, vcc, v7, v9, vcc
2866 ; SDAG-NEXT: s_setpc_b64 s[30:31]
2868 ; GISEL-LABEL: v_urem_v2i128_vv:
2869 ; GISEL: ; %bb.0: ; %_udiv-special-cases_udiv-special-cases
2870 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2871 ; GISEL-NEXT: v_or_b32_e32 v16, v8, v10
2872 ; GISEL-NEXT: v_or_b32_e32 v17, v9, v11
2873 ; GISEL-NEXT: v_or_b32_e32 v18, v0, v2
2874 ; GISEL-NEXT: v_or_b32_e32 v19, v1, v3
2875 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v9
2876 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v8
2877 ; GISEL-NEXT: v_ffbh_u32_e32 v24, v11
2878 ; GISEL-NEXT: v_ffbh_u32_e32 v25, v10
2879 ; GISEL-NEXT: v_ffbh_u32_e32 v26, v1
2880 ; GISEL-NEXT: v_ffbh_u32_e32 v27, v0
2881 ; GISEL-NEXT: v_ffbh_u32_e32 v28, v2
2882 ; GISEL-NEXT: v_ffbh_u32_e32 v29, v3
2883 ; GISEL-NEXT: v_mov_b32_e32 v20, 0x7f
2884 ; GISEL-NEXT: v_mov_b32_e32 v21, 0
2885 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
2886 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2887 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
2888 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], 32, v23
2889 ; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], 32, v25
2890 ; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], 32, v27
2891 ; GISEL-NEXT: v_add_i32_e64 v19, s[6:7], 32, v28
2892 ; GISEL-NEXT: v_min_u32_e32 v16, v22, v16
2893 ; GISEL-NEXT: v_min_u32_e32 v17, v24, v17
2894 ; GISEL-NEXT: v_min_u32_e32 v18, v26, v18
2895 ; GISEL-NEXT: v_min_u32_e32 v19, v29, v19
2896 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
2897 ; GISEL-NEXT: v_cndmask_b32_e64 v22, 0, 1, s[4:5]
2898 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, 64, v16
2899 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, 64, v18
2900 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11]
2901 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
2902 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[2:3]
2903 ; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
2904 ; GISEL-NEXT: v_sub_i32_e32 v18, vcc, v16, v17
2905 ; GISEL-NEXT: v_subb_u32_e64 v19, s[4:5], 0, 0, vcc
2906 ; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
2907 ; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
2908 ; GISEL-NEXT: v_xor_b32_e32 v23, 0x7f, v18
2909 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[18:19], v[20:21]
2910 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
2911 ; GISEL-NEXT: v_or_b32_e32 v20, v23, v16
2912 ; GISEL-NEXT: v_or_b32_e32 v21, v19, v17
2913 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
2914 ; GISEL-NEXT: v_cndmask_b32_e64 v23, 0, 1, vcc
2915 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
2916 ; GISEL-NEXT: v_cndmask_b32_e32 v23, v23, v24, vcc
2917 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[20:21]
2918 ; GISEL-NEXT: v_cndmask_b32_e64 v20, 0, 1, vcc
2919 ; GISEL-NEXT: v_or_b32_e32 v21, v22, v23
2920 ; GISEL-NEXT: v_or_b32_e32 v20, v21, v20
2921 ; GISEL-NEXT: v_and_b32_e32 v21, 1, v21
2922 ; GISEL-NEXT: v_and_b32_e32 v20, 1, v20
2923 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v21
2924 ; GISEL-NEXT: v_cndmask_b32_e64 v32, v0, 0, vcc
2925 ; GISEL-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v20
2926 ; GISEL-NEXT: v_cndmask_b32_e64 v20, v2, 0, vcc
2927 ; GISEL-NEXT: v_cndmask_b32_e64 v21, v3, 0, vcc
2928 ; GISEL-NEXT: s_xor_b64 s[4:5], s[4:5], -1
2929 ; GISEL-NEXT: v_cndmask_b32_e64 v33, v1, 0, vcc
2930 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
2931 ; GISEL-NEXT: s_cbranch_execz .LBB3_6
2932 ; GISEL-NEXT: ; %bb.1: ; %udiv-bb15
2933 ; GISEL-NEXT: v_add_i32_e32 v30, vcc, 1, v18
2934 ; GISEL-NEXT: v_addc_u32_e64 v31, s[4:5], 0, v19, vcc
2935 ; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 0x7f, v18
2936 ; GISEL-NEXT: v_not_b32_e32 v18, 63
2937 ; GISEL-NEXT: v_addc_u32_e64 v32, vcc, 0, v16, s[4:5]
2938 ; GISEL-NEXT: v_addc_u32_e32 v33, vcc, 0, v17, vcc
2939 ; GISEL-NEXT: v_add_i32_e64 v22, s[4:5], v26, v18
2940 ; GISEL-NEXT: v_sub_i32_e64 v20, s[4:5], 64, v26
2941 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[0:1], v26
2942 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[2:3], v26
2943 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
2944 ; GISEL-NEXT: v_lshr_b64 v[20:21], v[0:1], v20
2945 ; GISEL-NEXT: v_lshl_b64 v[24:25], v[0:1], v22
2946 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v26
2947 ; GISEL-NEXT: v_cndmask_b32_e32 v22, 0, v16, vcc
2948 ; GISEL-NEXT: v_cndmask_b32_e32 v23, 0, v17, vcc
2949 ; GISEL-NEXT: v_or_b32_e32 v16, v20, v18
2950 ; GISEL-NEXT: v_or_b32_e32 v17, v21, v19
2951 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v24, v16, vcc
2952 ; GISEL-NEXT: v_cndmask_b32_e32 v17, v25, v17, vcc
2953 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v26
2954 ; GISEL-NEXT: v_cndmask_b32_e32 v20, v16, v2, vcc
2955 ; GISEL-NEXT: v_cndmask_b32_e32 v21, v17, v3, vcc
2956 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
2957 ; GISEL-NEXT: v_mov_b32_e32 v19, s11
2958 ; GISEL-NEXT: v_mov_b32_e32 v18, s10
2959 ; GISEL-NEXT: v_mov_b32_e32 v17, s9
2960 ; GISEL-NEXT: v_mov_b32_e32 v16, s8
2961 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
2962 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
2963 ; GISEL-NEXT: s_cbranch_execz .LBB3_5
2964 ; GISEL-NEXT: ; %bb.2: ; %udiv-preheader4
2965 ; GISEL-NEXT: v_add_i32_e32 v26, vcc, 0xffffffc0, v30
2966 ; GISEL-NEXT: v_sub_i32_e32 v24, vcc, 64, v30
2967 ; GISEL-NEXT: v_lshr_b64 v[16:17], v[2:3], v30
2968 ; GISEL-NEXT: v_lshr_b64 v[18:19], v[0:1], v30
2969 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
2970 ; GISEL-NEXT: v_add_i32_e32 v34, vcc, -1, v8
2971 ; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v9, vcc
2972 ; GISEL-NEXT: v_lshl_b64 v[24:25], v[2:3], v24
2973 ; GISEL-NEXT: v_lshr_b64 v[26:27], v[2:3], v26
2974 ; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v10, vcc
2975 ; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v11, vcc
2976 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
2977 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v24
2978 ; GISEL-NEXT: v_or_b32_e32 v19, v19, v25
2979 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v30
2980 ; GISEL-NEXT: v_cndmask_b32_e32 v18, v26, v18, vcc
2981 ; GISEL-NEXT: v_cndmask_b32_e32 v19, v27, v19, vcc
2982 ; GISEL-NEXT: v_cndmask_b32_e32 v26, 0, v16, vcc
2983 ; GISEL-NEXT: v_cndmask_b32_e32 v27, 0, v17, vcc
2984 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v30
2985 ; GISEL-NEXT: v_cndmask_b32_e32 v28, v18, v0, vcc
2986 ; GISEL-NEXT: v_cndmask_b32_e32 v29, v19, v1, vcc
2987 ; GISEL-NEXT: v_mov_b32_e32 v25, 0
2988 ; GISEL-NEXT: v_mov_b32_e32 v19, s7
2989 ; GISEL-NEXT: v_mov_b32_e32 v18, s6
2990 ; GISEL-NEXT: v_mov_b32_e32 v17, s5
2991 ; GISEL-NEXT: v_mov_b32_e32 v16, s4
2992 ; GISEL-NEXT: .LBB3_3: ; %udiv-do-while3
2993 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
2994 ; GISEL-NEXT: v_lshrrev_b32_e32 v38, 31, v23
2995 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[22:23], 1
2996 ; GISEL-NEXT: v_lshrrev_b32_e32 v24, 31, v29
2997 ; GISEL-NEXT: v_lshl_b64 v[28:29], v[28:29], 1
2998 ; GISEL-NEXT: v_lshl_b64 v[26:27], v[26:27], 1
2999 ; GISEL-NEXT: v_lshrrev_b32_e32 v39, 31, v21
3000 ; GISEL-NEXT: v_add_i32_e32 v30, vcc, -1, v30
3001 ; GISEL-NEXT: v_addc_u32_e32 v31, vcc, -1, v31, vcc
3002 ; GISEL-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
3003 ; GISEL-NEXT: v_or_b32_e32 v22, v16, v18
3004 ; GISEL-NEXT: v_or_b32_e32 v23, v17, v19
3005 ; GISEL-NEXT: v_or_b32_e32 v18, v26, v24
3006 ; GISEL-NEXT: v_or_b32_e32 v19, v28, v39
3007 ; GISEL-NEXT: v_addc_u32_e32 v32, vcc, -1, v32, vcc
3008 ; GISEL-NEXT: v_addc_u32_e32 v33, vcc, -1, v33, vcc
3009 ; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v34, v19
3010 ; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v35, v29, vcc
3011 ; GISEL-NEXT: v_or_b32_e32 v16, v30, v32
3012 ; GISEL-NEXT: v_or_b32_e32 v17, v31, v33
3013 ; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v36, v18, vcc
3014 ; GISEL-NEXT: v_subb_u32_e32 v24, vcc, v37, v27, vcc
3015 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3016 ; GISEL-NEXT: v_ashrrev_i32_e32 v16, 31, v24
3017 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
3018 ; GISEL-NEXT: v_and_b32_e32 v17, v16, v8
3019 ; GISEL-NEXT: v_and_b32_e32 v26, v16, v9
3020 ; GISEL-NEXT: v_and_b32_e32 v39, v16, v10
3021 ; GISEL-NEXT: v_and_b32_e32 v48, v16, v11
3022 ; GISEL-NEXT: v_and_b32_e32 v24, 1, v16
3023 ; GISEL-NEXT: v_sub_i32_e32 v28, vcc, v19, v17
3024 ; GISEL-NEXT: v_subb_u32_e32 v29, vcc, v29, v26, vcc
3025 ; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v18, v39, vcc
3026 ; GISEL-NEXT: v_subb_u32_e32 v27, vcc, v27, v48, vcc
3027 ; GISEL-NEXT: v_or_b32_e32 v20, v20, v38
3028 ; GISEL-NEXT: v_mov_b32_e32 v16, v24
3029 ; GISEL-NEXT: v_mov_b32_e32 v17, v25
3030 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
3031 ; GISEL-NEXT: s_cbranch_execnz .LBB3_3
3032 ; GISEL-NEXT: ; %bb.4: ; %Flow13
3033 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
3034 ; GISEL-NEXT: .LBB3_5: ; %Flow14
3035 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
3036 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[22:23], 1
3037 ; GISEL-NEXT: v_lshl_b64 v[20:21], v[20:21], 1
3038 ; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v23
3039 ; GISEL-NEXT: v_or_b32_e32 v20, v20, v22
3040 ; GISEL-NEXT: v_or_b32_e32 v32, v16, v18
3041 ; GISEL-NEXT: v_or_b32_e32 v33, v17, v19
3042 ; GISEL-NEXT: .LBB3_6: ; %Flow16
3043 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
3044 ; GISEL-NEXT: s_mov_b64 s[8:9], 0
3045 ; GISEL-NEXT: v_or_b32_e32 v16, v12, v14
3046 ; GISEL-NEXT: v_or_b32_e32 v17, v13, v15
3047 ; GISEL-NEXT: v_or_b32_e32 v18, v4, v6
3048 ; GISEL-NEXT: v_or_b32_e32 v19, v5, v7
3049 ; GISEL-NEXT: v_ffbh_u32_e32 v22, v13
3050 ; GISEL-NEXT: v_ffbh_u32_e32 v23, v12
3051 ; GISEL-NEXT: v_ffbh_u32_e32 v26, v15
3052 ; GISEL-NEXT: v_ffbh_u32_e32 v27, v14
3053 ; GISEL-NEXT: v_ffbh_u32_e32 v28, v5
3054 ; GISEL-NEXT: v_ffbh_u32_e32 v29, v4
3055 ; GISEL-NEXT: v_ffbh_u32_e32 v30, v7
3056 ; GISEL-NEXT: v_ffbh_u32_e32 v31, v6
3057 ; GISEL-NEXT: v_mov_b32_e32 v24, 0x7f
3058 ; GISEL-NEXT: v_mov_b32_e32 v25, 0
3059 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3060 ; GISEL-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[18:19]
3061 ; GISEL-NEXT: v_add_i32_e64 v16, s[6:7], 32, v23
3062 ; GISEL-NEXT: v_add_i32_e64 v17, s[6:7], 32, v27
3063 ; GISEL-NEXT: v_add_i32_e64 v18, s[6:7], 32, v29
3064 ; GISEL-NEXT: v_add_i32_e64 v19, s[6:7], 32, v31
3065 ; GISEL-NEXT: v_min_u32_e32 v16, v22, v16
3066 ; GISEL-NEXT: v_min_u32_e32 v17, v26, v17
3067 ; GISEL-NEXT: v_min_u32_e32 v18, v28, v18
3068 ; GISEL-NEXT: v_min_u32_e32 v19, v30, v19
3069 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
3070 ; GISEL-NEXT: v_cndmask_b32_e64 v26, 0, 1, s[4:5]
3071 ; GISEL-NEXT: v_add_i32_e32 v16, vcc, 64, v16
3072 ; GISEL-NEXT: v_add_i32_e32 v18, vcc, 64, v18
3073 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[14:15]
3074 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v17, v16, vcc
3075 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[6:7]
3076 ; GISEL-NEXT: v_cndmask_b32_e32 v17, v19, v18, vcc
3077 ; GISEL-NEXT: v_sub_i32_e32 v22, vcc, v16, v17
3078 ; GISEL-NEXT: v_subb_u32_e64 v23, s[4:5], 0, 0, vcc
3079 ; GISEL-NEXT: v_subb_u32_e64 v16, s[4:5], 0, 0, s[4:5]
3080 ; GISEL-NEXT: v_subb_u32_e64 v17, s[4:5], 0, 0, s[4:5]
3081 ; GISEL-NEXT: v_cmp_gt_u64_e32 vcc, v[22:23], v[24:25]
3082 ; GISEL-NEXT: v_cndmask_b32_e64 v24, 0, 1, vcc
3083 ; GISEL-NEXT: v_xor_b32_e32 v18, 0x7f, v22
3084 ; GISEL-NEXT: v_cmp_lt_u64_e32 vcc, 0, v[16:17]
3085 ; GISEL-NEXT: v_cndmask_b32_e64 v25, 0, 1, vcc
3086 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v16
3087 ; GISEL-NEXT: v_or_b32_e32 v19, v23, v17
3088 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3089 ; GISEL-NEXT: v_cndmask_b32_e32 v24, v25, v24, vcc
3090 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[18:19]
3091 ; GISEL-NEXT: v_cndmask_b32_e64 v18, 0, 1, vcc
3092 ; GISEL-NEXT: v_or_b32_e32 v19, v26, v24
3093 ; GISEL-NEXT: v_and_b32_e32 v24, 1, v19
3094 ; GISEL-NEXT: v_or_b32_e32 v18, v19, v18
3095 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v24
3096 ; GISEL-NEXT: v_cndmask_b32_e64 v24, v4, 0, vcc
3097 ; GISEL-NEXT: v_and_b32_e32 v26, 1, v18
3098 ; GISEL-NEXT: v_cndmask_b32_e64 v25, v5, 0, vcc
3099 ; GISEL-NEXT: v_cndmask_b32_e64 v18, v6, 0, vcc
3100 ; GISEL-NEXT: v_cndmask_b32_e64 v19, v7, 0, vcc
3101 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v26
3102 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
3103 ; GISEL-NEXT: s_and_saveexec_b64 s[12:13], s[4:5]
3104 ; GISEL-NEXT: s_cbranch_execz .LBB3_12
3105 ; GISEL-NEXT: ; %bb.7: ; %udiv-bb1
3106 ; GISEL-NEXT: v_add_i32_e32 v34, vcc, 1, v22
3107 ; GISEL-NEXT: v_addc_u32_e64 v35, s[4:5], 0, v23, vcc
3108 ; GISEL-NEXT: v_sub_i32_e32 v28, vcc, 0x7f, v22
3109 ; GISEL-NEXT: v_not_b32_e32 v18, 63
3110 ; GISEL-NEXT: v_addc_u32_e64 v36, vcc, 0, v16, s[4:5]
3111 ; GISEL-NEXT: v_addc_u32_e32 v37, vcc, 0, v17, vcc
3112 ; GISEL-NEXT: v_add_i32_e64 v24, s[4:5], v28, v18
3113 ; GISEL-NEXT: v_sub_i32_e64 v22, s[4:5], 64, v28
3114 ; GISEL-NEXT: v_lshl_b64 v[16:17], v[4:5], v28
3115 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[6:7], v28
3116 ; GISEL-NEXT: s_xor_b64 s[4:5], vcc, -1
3117 ; GISEL-NEXT: v_lshr_b64 v[22:23], v[4:5], v22
3118 ; GISEL-NEXT: v_lshl_b64 v[26:27], v[4:5], v24
3119 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v28
3120 ; GISEL-NEXT: v_cndmask_b32_e32 v24, 0, v16, vcc
3121 ; GISEL-NEXT: v_cndmask_b32_e32 v25, 0, v17, vcc
3122 ; GISEL-NEXT: v_or_b32_e32 v16, v22, v18
3123 ; GISEL-NEXT: v_or_b32_e32 v17, v23, v19
3124 ; GISEL-NEXT: v_cndmask_b32_e32 v16, v26, v16, vcc
3125 ; GISEL-NEXT: v_cndmask_b32_e32 v17, v27, v17, vcc
3126 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v28
3127 ; GISEL-NEXT: v_cndmask_b32_e32 v22, v16, v6, vcc
3128 ; GISEL-NEXT: v_cndmask_b32_e32 v23, v17, v7, vcc
3129 ; GISEL-NEXT: s_mov_b64 s[10:11], s[8:9]
3130 ; GISEL-NEXT: v_mov_b32_e32 v19, s11
3131 ; GISEL-NEXT: v_mov_b32_e32 v18, s10
3132 ; GISEL-NEXT: v_mov_b32_e32 v17, s9
3133 ; GISEL-NEXT: v_mov_b32_e32 v16, s8
3134 ; GISEL-NEXT: s_and_saveexec_b64 s[6:7], s[4:5]
3135 ; GISEL-NEXT: s_xor_b64 s[8:9], exec, s[6:7]
3136 ; GISEL-NEXT: s_cbranch_execz .LBB3_11
3137 ; GISEL-NEXT: ; %bb.8: ; %udiv-preheader
3138 ; GISEL-NEXT: v_add_i32_e32 v28, vcc, 0xffffffc0, v34
3139 ; GISEL-NEXT: v_sub_i32_e32 v26, vcc, 64, v34
3140 ; GISEL-NEXT: v_lshr_b64 v[16:17], v[6:7], v34
3141 ; GISEL-NEXT: v_lshr_b64 v[18:19], v[4:5], v34
3142 ; GISEL-NEXT: s_mov_b64 s[4:5], 0
3143 ; GISEL-NEXT: v_add_i32_e32 v38, vcc, -1, v12
3144 ; GISEL-NEXT: v_addc_u32_e32 v39, vcc, -1, v13, vcc
3145 ; GISEL-NEXT: v_lshl_b64 v[26:27], v[6:7], v26
3146 ; GISEL-NEXT: v_lshr_b64 v[28:29], v[6:7], v28
3147 ; GISEL-NEXT: v_addc_u32_e32 v48, vcc, -1, v14, vcc
3148 ; GISEL-NEXT: v_addc_u32_e32 v49, vcc, -1, v15, vcc
3149 ; GISEL-NEXT: s_mov_b64 s[6:7], s[4:5]
3150 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v26
3151 ; GISEL-NEXT: v_or_b32_e32 v19, v19, v27
3152 ; GISEL-NEXT: v_cmp_gt_u32_e32 vcc, 64, v34
3153 ; GISEL-NEXT: v_cndmask_b32_e32 v18, v28, v18, vcc
3154 ; GISEL-NEXT: v_cndmask_b32_e32 v19, v29, v19, vcc
3155 ; GISEL-NEXT: v_cndmask_b32_e32 v30, 0, v16, vcc
3156 ; GISEL-NEXT: v_cndmask_b32_e32 v31, 0, v17, vcc
3157 ; GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v34
3158 ; GISEL-NEXT: v_cndmask_b32_e32 v28, v18, v4, vcc
3159 ; GISEL-NEXT: v_cndmask_b32_e32 v29, v19, v5, vcc
3160 ; GISEL-NEXT: v_mov_b32_e32 v27, 0
3161 ; GISEL-NEXT: v_mov_b32_e32 v19, s7
3162 ; GISEL-NEXT: v_mov_b32_e32 v18, s6
3163 ; GISEL-NEXT: v_mov_b32_e32 v17, s5
3164 ; GISEL-NEXT: v_mov_b32_e32 v16, s4
3165 ; GISEL-NEXT: .LBB3_9: ; %udiv-do-while
3166 ; GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
3167 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[24:25], 1
3168 ; GISEL-NEXT: v_lshrrev_b32_e32 v26, 31, v25
3169 ; GISEL-NEXT: v_lshl_b64 v[50:51], v[28:29], 1
3170 ; GISEL-NEXT: v_lshl_b64 v[30:31], v[30:31], 1
3171 ; GISEL-NEXT: v_lshrrev_b32_e32 v28, 31, v29
3172 ; GISEL-NEXT: v_lshrrev_b32_e32 v29, 31, v23
3173 ; GISEL-NEXT: v_lshl_b64 v[22:23], v[22:23], 1
3174 ; GISEL-NEXT: v_add_i32_e32 v34, vcc, -1, v34
3175 ; GISEL-NEXT: v_addc_u32_e32 v35, vcc, -1, v35, vcc
3176 ; GISEL-NEXT: v_or_b32_e32 v24, v16, v18
3177 ; GISEL-NEXT: v_or_b32_e32 v25, v17, v19
3178 ; GISEL-NEXT: v_or_b32_e32 v18, v30, v28
3179 ; GISEL-NEXT: v_or_b32_e32 v19, v50, v29
3180 ; GISEL-NEXT: v_or_b32_e32 v22, v22, v26
3181 ; GISEL-NEXT: v_addc_u32_e32 v36, vcc, -1, v36, vcc
3182 ; GISEL-NEXT: v_addc_u32_e32 v37, vcc, -1, v37, vcc
3183 ; GISEL-NEXT: v_sub_i32_e32 v16, vcc, v38, v19
3184 ; GISEL-NEXT: v_subb_u32_e32 v16, vcc, v39, v51, vcc
3185 ; GISEL-NEXT: v_or_b32_e32 v16, v34, v36
3186 ; GISEL-NEXT: v_or_b32_e32 v17, v35, v37
3187 ; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v48, v18, vcc
3188 ; GISEL-NEXT: v_subb_u32_e32 v26, vcc, v49, v31, vcc
3189 ; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[16:17]
3190 ; GISEL-NEXT: v_ashrrev_i32_e32 v16, 31, v26
3191 ; GISEL-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
3192 ; GISEL-NEXT: v_and_b32_e32 v26, 1, v16
3193 ; GISEL-NEXT: v_and_b32_e32 v17, v16, v12
3194 ; GISEL-NEXT: v_and_b32_e32 v29, v16, v13
3195 ; GISEL-NEXT: v_and_b32_e32 v30, v16, v14
3196 ; GISEL-NEXT: v_and_b32_e32 v50, v16, v15
3197 ; GISEL-NEXT: v_sub_i32_e32 v28, vcc, v19, v17
3198 ; GISEL-NEXT: v_subb_u32_e32 v29, vcc, v51, v29, vcc
3199 ; GISEL-NEXT: v_mov_b32_e32 v16, v26
3200 ; GISEL-NEXT: v_mov_b32_e32 v17, v27
3201 ; GISEL-NEXT: v_subb_u32_e32 v30, vcc, v18, v30, vcc
3202 ; GISEL-NEXT: v_subb_u32_e32 v31, vcc, v31, v50, vcc
3203 ; GISEL-NEXT: s_andn2_b64 exec, exec, s[4:5]
3204 ; GISEL-NEXT: s_cbranch_execnz .LBB3_9
3205 ; GISEL-NEXT: ; %bb.10: ; %Flow
3206 ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5]
3207 ; GISEL-NEXT: .LBB3_11: ; %Flow11
3208 ; GISEL-NEXT: s_or_b64 exec, exec, s[8:9]
3209 ; GISEL-NEXT: v_lshl_b64 v[26:27], v[24:25], 1
3210 ; GISEL-NEXT: v_lshl_b64 v[18:19], v[22:23], 1
3211 ; GISEL-NEXT: v_lshrrev_b32_e32 v22, 31, v25
3212 ; GISEL-NEXT: v_or_b32_e32 v18, v18, v22
3213 ; GISEL-NEXT: v_or_b32_e32 v24, v16, v26
3214 ; GISEL-NEXT: v_or_b32_e32 v25, v17, v27
3215 ; GISEL-NEXT: .LBB3_12: ; %Flow12
3216 ; GISEL-NEXT: s_or_b64 exec, exec, s[12:13]
3217 ; GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v8, v32, 0
3218 ; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v8, v20, 0
3219 ; GISEL-NEXT: v_mul_lo_u32 v28, v8, v21
3220 ; GISEL-NEXT: v_mul_lo_u32 v29, v9, v20
3221 ; GISEL-NEXT: v_mad_u64_u32 v[20:21], s[4:5], v12, v24, 0
3222 ; GISEL-NEXT: v_mad_u64_u32 v[26:27], s[4:5], v12, v18, 0
3223 ; GISEL-NEXT: v_mul_lo_u32 v30, v12, v19
3224 ; GISEL-NEXT: v_mul_lo_u32 v31, v13, v18
3225 ; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v9, v33, v[22:23]
3226 ; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v13, v25, v[26:27]
3227 ; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[4:5], v10, v32, v[18:19]
3228 ; GISEL-NEXT: v_mad_u64_u32 v[22:23], s[4:5], v14, v24, v[22:23]
3229 ; GISEL-NEXT: v_mad_u64_u32 v[17:18], vcc, v8, v33, v[17:18]
3230 ; GISEL-NEXT: v_mad_u64_u32 v[21:22], s[4:5], v12, v25, v[21:22]
3231 ; GISEL-NEXT: v_mad_u64_u32 v[8:9], s[6:7], v9, v32, v[17:18]
3232 ; GISEL-NEXT: v_addc_u32_e64 v17, s[6:7], v19, v28, s[6:7]
3233 ; GISEL-NEXT: v_mad_u64_u32 v[12:13], s[6:7], v13, v24, v[21:22]
3234 ; GISEL-NEXT: v_addc_u32_e64 v18, s[6:7], v23, v30, s[6:7]
3235 ; GISEL-NEXT: v_addc_u32_e32 v17, vcc, v17, v29, vcc
3236 ; GISEL-NEXT: v_sub_i32_e32 v0, vcc, v0, v16
3237 ; GISEL-NEXT: v_subb_u32_e32 v1, vcc, v1, v8, vcc
3238 ; GISEL-NEXT: v_addc_u32_e64 v8, s[4:5], v18, v31, s[4:5]
3239 ; GISEL-NEXT: v_sub_i32_e64 v4, s[4:5], v4, v20
3240 ; GISEL-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v12, s[4:5]
3241 ; GISEL-NEXT: v_mad_u64_u32 v[16:17], s[6:7], v10, v33, v[17:18]
3242 ; GISEL-NEXT: v_mad_u64_u32 v[18:19], s[6:7], v14, v25, v[8:9]
3243 ; GISEL-NEXT: v_mad_u64_u32 v[10:11], s[6:7], v11, v32, v[16:17]
3244 ; GISEL-NEXT: v_mad_u64_u32 v[11:12], s[6:7], v15, v24, v[18:19]
3245 ; GISEL-NEXT: v_subb_u32_e32 v2, vcc, v2, v9, vcc
3246 ; GISEL-NEXT: v_subb_u32_e32 v3, vcc, v3, v10, vcc
3247 ; GISEL-NEXT: v_subb_u32_e64 v6, vcc, v6, v13, s[4:5]
3248 ; GISEL-NEXT: v_subb_u32_e32 v7, vcc, v7, v11, vcc
3249 ; GISEL-NEXT: s_setpc_b64 s[30:31]
3250 %shl = urem <2 x i128> %lhs, %rhs