1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6,GFX67-SDAG,GFX6-SDAG %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx600 < %s | FileCheck -check-prefixes=GFX67,GFX6,GFX67-GISEL,GFX6-GISEL %s
5 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7,GFX67-SDAG,GFX7-SDAG %s
6 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx700 < %s | FileCheck -check-prefixes=GFX67,GFX7,GFX67-GISEL,GFX7-GISEL %s
8 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
9 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
11 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
12 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
14 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-SDAG,GFX90A-SDAG %s
15 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-GISEL,GFX90A-GISEL %s
17 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
18 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
20 ; Test for integer mad formation for patterns used in clpeak
22 define i32 @clpeak_imad_pat_i32(i32 %x, i32 %y) {
23 ; GFX67-SDAG-LABEL: clpeak_imad_pat_i32:
24 ; GFX67-SDAG: ; %bb.0: ; %entry
25 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
26 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
27 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
28 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
29 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
30 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
31 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0
32 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
33 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1
34 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
36 ; GFX67-GISEL-LABEL: clpeak_imad_pat_i32:
37 ; GFX67-GISEL: ; %bb.0: ; %entry
38 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
40 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
41 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
42 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
43 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
44 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
45 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
46 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
47 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
49 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i32:
50 ; GFX8-SDAG: ; %bb.0: ; %entry
51 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
53 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
54 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0
55 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
56 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
57 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0
58 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
59 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
60 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
62 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i32:
63 ; GFX8-GISEL: ; %bb.0: ; %entry
64 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
65 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
66 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
67 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0
68 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
69 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2
70 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
71 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
72 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
73 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
75 ; GFX900-SDAG-LABEL: clpeak_imad_pat_i32:
76 ; GFX900-SDAG: ; %bb.0: ; %entry
77 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
79 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
80 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
81 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
82 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
83 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
84 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
86 ; GFX900-GISEL-LABEL: clpeak_imad_pat_i32:
87 ; GFX900-GISEL: ; %bb.0: ; %entry
88 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
89 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
90 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
91 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
92 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
93 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
94 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
95 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
96 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
97 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
99 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_i32:
100 ; GFX90A-SDAG: ; %bb.0: ; %entry
101 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
103 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
104 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
105 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
106 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
107 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
108 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
110 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_i32:
111 ; GFX90A-GISEL: ; %bb.0: ; %entry
112 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
114 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
115 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
116 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
117 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
118 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
119 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
120 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
121 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
123 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i32:
124 ; GFX10-SDAG: ; %bb.0: ; %entry
125 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
126 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
127 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
128 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
129 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
130 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
131 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
132 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
134 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i32:
135 ; GFX10-GISEL: ; %bb.0: ; %entry
136 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
137 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
138 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
139 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0
140 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
141 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2
142 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
143 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
144 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
145 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
148 %add = mul i32 %y18, %y
149 %mul119 = add i32 %add, %y18
150 %add2 = mul i32 %mul119, %y
151 %add220 = add i32 %add, 1
152 %add422 = add i32 %add2, 1
153 %mul521 = mul i32 %add2, %add220
154 %add6 = mul i32 %mul521, %add422
158 define signext i16 @clpeak_imad_pat_i16(i16 signext %x, i16 signext %y) {
159 ; GFX67-SDAG-LABEL: clpeak_imad_pat_i16:
160 ; GFX67-SDAG: ; %bb.0: ; %entry
161 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
163 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
164 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1
165 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
166 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
167 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
168 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
169 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
170 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
171 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
172 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
173 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
174 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
175 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
176 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
177 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
178 ; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
179 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
181 ; GFX67-GISEL-LABEL: clpeak_imad_pat_i16:
182 ; GFX67-GISEL: ; %bb.0: ; %entry
183 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
185 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
186 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
187 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
188 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
189 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
190 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
191 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
192 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
193 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
194 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
195 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
196 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
197 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
198 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
199 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
200 ; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
201 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
203 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i16:
204 ; GFX8-SDAG: ; %bb.0: ; %entry
205 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
207 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
208 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
209 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
210 ; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
211 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
213 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i16:
214 ; GFX8-GISEL: ; %bb.0: ; %entry
215 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
216 ; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
217 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
218 ; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
219 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
220 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
221 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4
222 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v3, v2, 1
223 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
224 ; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
225 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
227 ; GFX9-SDAG-LABEL: clpeak_imad_pat_i16:
228 ; GFX9-SDAG: ; %bb.0: ; %entry
229 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
230 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
231 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
232 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
233 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
234 ; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
235 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
237 ; GFX9-GISEL-LABEL: clpeak_imad_pat_i16:
238 ; GFX9-GISEL: ; %bb.0: ; %entry
239 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
240 ; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
241 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
242 ; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
243 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
244 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1
245 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4
246 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v3, v2, 1
247 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
248 ; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
249 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
251 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i16:
252 ; GFX10-SDAG: ; %bb.0: ; %entry
253 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
254 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
255 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
256 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
257 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
258 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
259 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
261 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i16:
262 ; GFX10-GISEL: ; %bb.0: ; %entry
263 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
264 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
265 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
266 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
267 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
268 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
269 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
270 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
271 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
272 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
273 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
275 %conv33 = add i16 %x, 1
276 %add = mul i16 %conv33, %y
277 %conv434 = add i16 %y, 1
278 %add8 = mul i16 %conv434, %add
279 %conv1035 = add i16 %add, 1
280 %add14 = mul i16 %conv1035, %add8
281 %conv1636 = add i16 %add8, 1
282 %add20 = mul i16 %add14, %conv1636
286 define <2 x i16> @clpeak_imad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
287 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i16:
288 ; GFX67-SDAG: ; %bb.0: ; %entry
289 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
290 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
291 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
292 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
293 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
294 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1
295 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
296 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0
297 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
298 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1
299 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
300 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2
301 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v4, v2, 1
302 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3
303 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
304 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v5, v3, 1
305 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
306 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6
307 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
308 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5
309 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4
310 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7
311 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2
312 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
313 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
314 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
315 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
316 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0
317 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1
318 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
320 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i16:
321 ; GFX67-GISEL: ; %bb.0: ; %entry
322 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
324 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
325 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
326 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
327 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
328 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
329 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
330 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
331 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
332 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
333 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
334 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
335 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
336 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
337 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
338 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
339 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
340 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
341 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
342 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
343 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
344 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
345 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
346 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
347 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
348 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
349 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
350 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
351 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
352 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
353 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
354 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
355 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
356 ; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
357 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
358 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
359 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
360 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
361 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
362 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
363 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
364 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
365 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
366 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
367 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
368 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
369 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
371 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16:
372 ; GFX8-SDAG: ; %bb.0: ; %entry
373 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1
375 ; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0
376 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
377 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
378 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v0, v3
379 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, v0
380 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v2, v1
381 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v2, v1, v2
382 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v3
383 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v2, v1
384 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0
385 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1
386 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
387 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
388 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
389 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
390 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
392 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i16:
393 ; GFX8-GISEL: ; %bb.0: ; %entry
394 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
395 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1
396 ; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0
397 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
398 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
399 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2
400 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0
401 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v1
402 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3
403 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v2, v1, 1
404 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1
405 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1
406 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
407 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v2
408 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0
409 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v2, v1
410 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
411 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
412 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
414 ; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i16:
415 ; GFX9-SDAG: ; %bb.0: ; %entry
416 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
418 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
419 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
420 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
421 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
422 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
423 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
424 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
425 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
427 ; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i16:
428 ; GFX9-GISEL: ; %bb.0: ; %entry
429 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
431 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
432 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
433 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
434 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
435 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
436 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
437 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
438 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
440 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16:
441 ; GFX10-SDAG: ; %bb.0: ; %entry
442 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
444 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
445 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
446 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
447 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
448 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
449 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
450 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
451 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
453 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16:
454 ; GFX10-GISEL: ; %bb.0: ; %entry
455 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
457 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
458 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
459 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
460 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
461 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
462 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
463 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
464 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
466 %y18 = add <2 x i16> %x, <i16 1, i16 1>
467 %add = mul <2 x i16> %y18, %y
468 %mul119 = add <2 x i16> %add, %y18
469 %add2 = mul <2 x i16> %mul119, %y
470 %add220 = add <2 x i16> %add, <i16 1, i16 1>
471 %add422 = add <2 x i16> %add2, <i16 1, i16 1>
472 %mul521 = mul <2 x i16> %add2, %add220
473 %add6 = mul <2 x i16> %mul521, %add422
477 define <3 x i16> @clpeak_imad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
478 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i16:
479 ; GFX67-SDAG: ; %bb.0: ; %entry
480 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
482 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
483 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v0
484 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v1
485 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
486 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
487 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
488 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v8, v4
489 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v6, v3, v0
490 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v8, v4, v1
491 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v3, 1
492 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v2
493 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
494 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
495 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
496 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
497 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v9
498 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v7, v5, v2
499 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v0, v3
500 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
501 ; GFX67-SDAG-NEXT: v_or_b32_e32 v6, v9, v6
502 ; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
503 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v3, 1
504 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
505 ; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, s4, v6
506 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
507 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1
508 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v5
509 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v7, v5, 1
510 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v3, v0
511 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v6
512 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
513 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
514 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v5, 1
515 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v8
516 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
517 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
518 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
519 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0
520 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v6
521 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
522 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
523 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
524 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
525 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
526 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
527 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
528 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v5, v0
529 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
530 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v3, v2
531 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
533 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v3i16:
534 ; GFX67-GISEL: ; %bb.0: ; %entry
535 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
536 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
537 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
538 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0
539 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
540 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
541 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3
542 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1
543 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
544 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4
545 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2
546 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
547 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
548 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5
549 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
550 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
551 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
552 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
553 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
554 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
555 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
556 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
557 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
558 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
559 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
560 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
561 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
562 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
563 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
564 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
565 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
566 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
567 ; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2
568 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
569 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
570 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
571 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
572 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
573 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6
574 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
575 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
576 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
577 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
578 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
579 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8
580 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
581 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
583 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i16:
584 ; GFX8-SDAG: ; %bb.0: ; %entry
585 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
586 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 1
587 ; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v0
588 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
589 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2
590 ; GFX8-SDAG-NEXT: v_add_u16_e32 v1, 1, v1
591 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v6, v0, v5
592 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v5, v0
593 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v3
594 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v8, v4, v2
595 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v2, v4
596 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1
597 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v5
598 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3
599 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v4, v2
600 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v0, v6, v0
601 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v2, v8, v2
602 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v3, v0, v3
603 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1
604 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
605 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v2, v4
606 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5
607 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
608 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
610 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v3i16:
611 ; GFX8-GISEL: ; %bb.0: ; %entry
612 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 1
614 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
615 ; GFX8-GISEL-NEXT: v_add_u16_e32 v5, 1, v0
616 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
617 ; GFX8-GISEL-NEXT: v_add_u16_e32 v1, 1, v1
618 ; GFX8-GISEL-NEXT: v_mad_u16 v6, v5, v2, v5
619 ; GFX8-GISEL-NEXT: v_mad_u16 v7, v0, v4, v0
620 ; GFX8-GISEL-NEXT: v_mad_u16 v8, v1, v3, v1
621 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v9, v6, v2
622 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v10, v7, v4
623 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v5, v2, 1
624 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1
625 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v11, v8, v3
626 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
627 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v6, v2, 1
628 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v7, v4, 1
629 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v5, v9, v5
630 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v10, v0
631 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v8, v3, 1
632 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v11, v1
633 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v5, v2
634 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
635 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
636 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3
637 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
639 ; GFX9-SDAG-LABEL: clpeak_imad_pat_v3i16:
640 ; GFX9-SDAG: ; %bb.0: ; %entry
641 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
643 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
644 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
645 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
646 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
647 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
648 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
649 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
650 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
651 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1
652 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
653 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
654 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
655 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
656 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
657 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
658 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
660 ; GFX9-GISEL-LABEL: clpeak_imad_pat_v3i16:
661 ; GFX9-GISEL: ; %bb.0: ; %entry
662 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
663 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
664 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1
665 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
666 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
667 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
668 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
669 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
670 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
671 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
672 ; GFX9-GISEL-NEXT: v_pk_add_u16 v3, v5, 1
673 ; GFX9-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
674 ; GFX9-GISEL-NEXT: v_pk_add_u16 v5, v1, 1
675 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
676 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
677 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
678 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
679 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
681 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i16:
682 ; GFX10-SDAG: ; %bb.0: ; %entry
683 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
685 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
686 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
687 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
688 ; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
689 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
690 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
691 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
692 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
693 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
694 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
695 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
696 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
697 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
698 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
699 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
700 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
702 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i16:
703 ; GFX10-GISEL: ; %bb.0: ; %entry
704 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
706 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1
707 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
708 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
709 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
710 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
711 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
712 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
713 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
714 ; GFX10-GISEL-NEXT: v_pk_add_u16 v3, v5, 1
715 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
716 ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1
717 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
718 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
719 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
720 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
721 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
723 %y48 = add <3 x i16> %x, <i16 1, i16 1, i16 1>
724 %add = mul <3 x i16> %y48, %y
725 %mul1249 = add <3 x i16> %add, %y48
726 %add15 = mul <3 x i16> %mul1249, %y
727 %add1550 = add <3 x i16> %add, <i16 1, i16 1, i16 1>
728 %add2452 = add <3 x i16> %add15, <i16 1, i16 1, i16 1>
729 %mul3051 = mul <3 x i16> %add15, %add1550
730 %add33 = mul <3 x i16> %mul3051, %add2452
734 define <4 x i16> @clpeak_imad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
735 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v4i16:
736 ; GFX67-SDAG: ; %bb.0: ; %entry
737 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
738 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3
739 ; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v3
740 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
741 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
742 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v11, v7, v3
743 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
744 ; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v2
745 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
746 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
747 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
748 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
749 ; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v1
750 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
751 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
752 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v11, v7
753 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
754 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v9, v6, 1
755 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v10, v5
756 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v9, v6, v2
757 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v4, v0
758 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v10, v5, v1
759 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
760 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v13
761 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v4, 1
762 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
763 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
764 ; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v9, v7
765 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
766 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v12
767 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
768 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v10, v0, v4
769 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5
770 ; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
771 ; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8
772 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1
773 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v2, v6
774 ; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8
775 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v6, 1
776 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
777 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v1
778 ; GFX67-SDAG-NEXT: v_add_i32_e32 v7, vcc, s4, v7
779 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
780 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v3
781 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
782 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v8
783 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
784 ; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v6, v2
785 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v7
786 ; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v10
787 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
788 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
789 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v5
790 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v7
791 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
792 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, s4, v2
793 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0
794 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v9, v8
795 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v4, v5
796 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
797 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
798 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2
799 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8
800 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
801 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
802 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
803 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
804 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
805 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v7, v0
806 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5
807 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2
808 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
809 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
811 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v4i16:
812 ; GFX67-GISEL: ; %bb.0: ; %entry
813 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
814 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
815 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
816 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v3
817 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1
818 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
819 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0
820 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v9, 16, v9
821 ; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v3
822 ; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v9
823 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v2
824 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
825 ; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10
826 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v8
827 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
828 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
829 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
830 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5
831 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9
832 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4
833 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
834 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
835 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
836 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
837 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6
838 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7
839 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
840 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
841 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
842 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
843 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
844 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
845 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
846 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
847 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
848 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
849 ; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2
850 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
851 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
852 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
853 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10
854 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
855 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
856 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
857 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
858 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
859 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
860 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
861 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
862 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
863 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
864 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
865 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
866 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
867 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7
868 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
869 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
870 ; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6
871 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
872 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
873 ; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1
874 ; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3
875 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
876 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
877 ; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
878 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8
879 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
880 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
881 ; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
882 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4
883 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
884 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
885 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
886 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
887 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
888 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5
889 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5
890 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8
891 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4
892 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
893 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9
894 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6
895 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
896 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7
897 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
898 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
899 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
900 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
901 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7
902 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
903 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6
904 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4
905 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5
906 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
908 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i16:
909 ; GFX8-SDAG: ; %bb.0: ; %entry
910 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
911 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 1
912 ; GFX8-SDAG-NEXT: v_add_u16_e32 v5, 1, v0
913 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
914 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v8, 16, v2
915 ; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v1
916 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
917 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v3
918 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v9, v0, v8
919 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v8, v0
920 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v6
921 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v10, v5, v2
922 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v11, v4, v3
923 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v6, v1
924 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v3, v4
925 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v5, v2, v5
926 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v8
927 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v6
928 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v5, v2
929 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v4, v3
930 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v9, v0
931 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1
932 ; GFX8-SDAG-NEXT: v_mad_u16 v7, v2, v10, v2
933 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, v4
934 ; GFX8-SDAG-NEXT: v_mad_u16 v6, v3, v11, v3
935 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5
936 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
937 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v7, v2, v7
938 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
939 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
940 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v6, v3, v6
941 ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v2, v1
942 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
944 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v4i16:
945 ; GFX8-GISEL: ; %bb.0: ; %entry
946 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
947 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 1
948 ; GFX8-GISEL-NEXT: v_add_u16_e32 v4, 1, v0
949 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
950 ; GFX8-GISEL-NEXT: v_add_u16_e32 v6, 1, v1
951 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
952 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
953 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v3
954 ; GFX8-GISEL-NEXT: v_mad_u16 v8, v4, v2, v4
955 ; GFX8-GISEL-NEXT: v_mad_u16 v9, v0, v5, v0
956 ; GFX8-GISEL-NEXT: v_mad_u16 v10, v6, v3, v6
957 ; GFX8-GISEL-NEXT: v_mad_u16 v11, v1, v7, v1
958 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v12, v8, v2
959 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v13, v9, v5
960 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v4, v2, 1
961 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v5, 1
962 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v14, v10, v3
963 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v15, v11, v7
964 ; GFX8-GISEL-NEXT: v_mad_u16 v6, v6, v3, 1
965 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v7, 1
966 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v8, v2, 1
967 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v9, v5, 1
968 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v12, v4
969 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v13, v0
970 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v10, v3, 1
971 ; GFX8-GISEL-NEXT: v_mad_u16 v7, v11, v7, 1
972 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v14, v6
973 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v15, v1
974 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v4, v2
975 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
976 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
977 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v3
978 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
979 ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
980 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
982 ; GFX9-SDAG-LABEL: clpeak_imad_pat_v4i16:
983 ; GFX9-SDAG: ; %bb.0: ; %entry
984 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
985 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
986 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
987 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
988 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
989 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
990 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
991 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
992 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
993 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
994 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1 op_sel_hi:[1,0]
995 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
996 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
997 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
998 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
999 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
1000 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
1001 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1003 ; GFX9-GISEL-LABEL: clpeak_imad_pat_v4i16:
1004 ; GFX9-GISEL: ; %bb.0: ; %entry
1005 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1006 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1007 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1008 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1009 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1010 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
1011 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
1012 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1013 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1014 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
1015 ; GFX9-GISEL-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
1016 ; GFX9-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
1017 ; GFX9-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0]
1018 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1019 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1020 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
1021 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
1022 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1024 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i16:
1025 ; GFX10-SDAG: ; %bb.0: ; %entry
1026 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1028 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1029 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
1030 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
1031 ; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
1032 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
1033 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1034 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1035 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
1036 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
1037 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
1038 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
1039 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1040 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1041 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
1042 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
1043 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1045 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i16:
1046 ; GFX10-GISEL: ; %bb.0: ; %entry
1047 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1048 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1049 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1050 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1051 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1052 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
1053 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
1054 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1055 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1056 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
1057 ; GFX10-GISEL-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
1058 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
1059 ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0]
1060 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1061 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1062 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
1063 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
1064 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1066 %y18 = add <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1>
1067 %add = mul <4 x i16> %y18, %y
1068 %mul119 = add <4 x i16> %add, %y18
1069 %add2 = mul <4 x i16> %mul119, %y
1070 %add220 = add <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
1071 %add422 = add <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
1072 %mul521 = mul <4 x i16> %add2, %add220
1073 %add6 = mul <4 x i16> %mul521, %add422
1077 define zeroext i16 @clpeak_umad_pat_i16(i16 zeroext %x, i16 zeroext %y) {
1078 ; GFX67-SDAG-LABEL: clpeak_umad_pat_i16:
1079 ; GFX67-SDAG: ; %bb.0: ; %entry
1080 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1081 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1082 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1083 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1
1084 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1
1085 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1086 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1087 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2
1088 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
1089 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1090 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
1091 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
1092 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v2, 1
1093 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1094 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1095 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
1096 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1097 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
1099 ; GFX67-GISEL-LABEL: clpeak_umad_pat_i16:
1100 ; GFX67-GISEL: ; %bb.0: ; %entry
1101 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1102 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1103 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1104 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
1105 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1106 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1107 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
1108 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
1109 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1110 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1111 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
1112 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1113 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1114 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1115 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1116 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
1117 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1118 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
1120 ; GFX8-SDAG-LABEL: clpeak_umad_pat_i16:
1121 ; GFX8-SDAG: ; %bb.0: ; %entry
1122 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
1124 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
1125 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
1126 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
1127 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1129 ; GFX8-GISEL-LABEL: clpeak_umad_pat_i16:
1130 ; GFX8-GISEL: ; %bb.0: ; %entry
1131 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1132 ; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
1133 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
1134 ; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
1135 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
1136 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
1137 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4
1138 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v3, v2, 1
1139 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
1140 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1142 ; GFX9-SDAG-LABEL: clpeak_umad_pat_i16:
1143 ; GFX9-SDAG: ; %bb.0: ; %entry
1144 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1145 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
1146 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
1147 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
1148 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
1149 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1151 ; GFX9-GISEL-LABEL: clpeak_umad_pat_i16:
1152 ; GFX9-GISEL: ; %bb.0: ; %entry
1153 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1154 ; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
1155 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
1156 ; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
1157 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
1158 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1
1159 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4
1160 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v3, v2, 1
1161 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
1162 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1164 ; GFX10-SDAG-LABEL: clpeak_umad_pat_i16:
1165 ; GFX10-SDAG: ; %bb.0: ; %entry
1166 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1167 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
1168 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
1169 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
1170 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
1171 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1172 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1174 ; GFX10-GISEL-LABEL: clpeak_umad_pat_i16:
1175 ; GFX10-GISEL: ; %bb.0: ; %entry
1176 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1177 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
1178 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
1179 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
1180 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
1181 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
1182 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
1183 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
1184 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
1185 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1186 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1188 %conv33 = add i16 %x, 1
1189 %add = mul i16 %conv33, %y
1190 %conv434 = add i16 %y, 1
1191 %add8 = mul i16 %conv434, %add
1192 %conv1035 = add i16 %add, 1
1193 %add14 = mul i16 %conv1035, %add8
1194 %conv1636 = add i16 %add8, 1
1195 %add20 = mul i16 %add14, %conv1636
1199 define <2 x i16> @clpeak_umad_pat_v2i16(<2 x i16> %x, <2 x i16> %y) {
1200 ; GFX67-SDAG-LABEL: clpeak_umad_pat_v2i16:
1201 ; GFX67-SDAG: ; %bb.0: ; %entry
1202 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1203 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1204 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
1205 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1206 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1207 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1
1208 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1209 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0
1210 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1211 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1
1212 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1213 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2
1214 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v4, v2, 1
1215 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3
1216 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
1217 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v5, v3, 1
1218 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
1219 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6
1220 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
1221 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5
1222 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4
1223 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7
1224 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2
1225 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1226 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1227 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1228 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1229 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0
1230 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1
1231 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
1233 ; GFX67-GISEL-LABEL: clpeak_umad_pat_v2i16:
1234 ; GFX67-GISEL: ; %bb.0: ; %entry
1235 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1236 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1237 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1238 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
1239 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
1240 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
1241 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
1242 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
1243 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1244 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
1245 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1246 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
1247 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
1248 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
1249 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
1250 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1251 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1252 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1253 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
1254 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
1255 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1256 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1257 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
1258 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1259 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
1260 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1261 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1262 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
1263 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
1264 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
1265 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1266 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
1267 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1268 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
1269 ; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
1270 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1271 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1272 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1273 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1274 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
1275 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
1276 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
1277 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1278 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1279 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1280 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
1281 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
1282 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
1284 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16:
1285 ; GFX8-SDAG: ; %bb.0: ; %entry
1286 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1
1288 ; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0
1289 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1290 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1291 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v0, v3
1292 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, v0
1293 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v2, v1
1294 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v2, v1, v2
1295 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v3
1296 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v2, v1
1297 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0
1298 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1
1299 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
1300 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1301 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
1302 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
1303 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1305 ; GFX8-GISEL-LABEL: clpeak_umad_pat_v2i16:
1306 ; GFX8-GISEL: ; %bb.0: ; %entry
1307 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1308 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1
1309 ; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0
1310 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1311 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1312 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2
1313 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0
1314 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v1
1315 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3
1316 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v2, v1, 1
1317 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1
1318 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1
1319 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
1320 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v2
1321 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0
1322 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v2, v1
1323 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1324 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
1325 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1327 ; GFX9-SDAG-LABEL: clpeak_umad_pat_v2i16:
1328 ; GFX9-SDAG: ; %bb.0: ; %entry
1329 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1330 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1331 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
1332 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
1333 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1334 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
1335 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
1336 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1337 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1338 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1340 ; GFX9-GISEL-LABEL: clpeak_umad_pat_v2i16:
1341 ; GFX9-GISEL: ; %bb.0: ; %entry
1342 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1343 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1344 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
1345 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
1346 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1347 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
1348 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
1349 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1350 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1351 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16:
1354 ; GFX10-SDAG: ; %bb.0: ; %entry
1355 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1357 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
1358 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
1359 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1360 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
1361 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
1362 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1363 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1364 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1366 ; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16:
1367 ; GFX10-GISEL: ; %bb.0: ; %entry
1368 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1369 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1370 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
1371 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
1372 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1373 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
1374 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
1375 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
1376 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1377 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1379 %y18 = add <2 x i16> %x, <i16 1, i16 1>
1380 %add = mul <2 x i16> %y18, %y
1381 %mul119 = add <2 x i16> %add, %y18
1382 %add2 = mul <2 x i16> %mul119, %y
1383 %add220 = add <2 x i16> %add, <i16 1, i16 1>
1384 %add422 = add <2 x i16> %add2, <i16 1, i16 1>
1385 %mul521 = mul <2 x i16> %add2, %add220
1386 %add6 = mul <2 x i16> %mul521, %add422
1390 define <3 x i16> @clpeak_umad_pat_v3i16(<3 x i16> %x, <3 x i16> %y) {
1391 ; GFX67-SDAG-LABEL: clpeak_umad_pat_v3i16:
1392 ; GFX67-SDAG: ; %bb.0: ; %entry
1393 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1394 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1395 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1396 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v0
1397 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v1
1398 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1399 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
1400 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1401 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v9, v8, v4
1402 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v6, v3, v0
1403 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v8, v4, v1
1404 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v6, v6, v3, 1
1405 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v2
1406 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
1407 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1408 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1409 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
1410 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v9
1411 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v7, v5, v2
1412 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v0, v3
1413 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
1414 ; GFX67-SDAG-NEXT: v_or_b32_e32 v6, v9, v6
1415 ; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
1416 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v3, 1
1417 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1418 ; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, s4, v6
1419 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1420 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1
1421 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v5
1422 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v7, v5, 1
1423 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v3, v0
1424 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v6
1425 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1426 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
1427 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v5, 1
1428 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v8
1429 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
1430 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1431 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
1432 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0
1433 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v5, v6
1434 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
1435 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1436 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v0
1437 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
1438 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1439 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1440 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1441 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v5, v0
1442 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
1443 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v3, v2
1444 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
1446 ; GFX67-GISEL-LABEL: clpeak_umad_pat_v3i16:
1447 ; GFX67-GISEL: ; %bb.0: ; %entry
1448 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1449 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1450 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1451 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v0
1452 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1453 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1454 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v6, v6, v3
1455 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v1
1456 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
1457 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v7, v7, v4
1458 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v2
1459 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
1460 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
1461 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v5
1462 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
1463 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1464 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
1465 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
1466 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1467 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
1468 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
1469 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1470 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
1471 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
1472 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1473 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1474 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
1475 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
1476 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
1477 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
1478 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1479 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v4
1480 ; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v2
1481 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1482 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1483 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v5
1484 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
1485 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1486 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v6
1487 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
1488 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1489 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v7
1490 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
1491 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1492 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v8
1493 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v3
1494 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
1496 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v3i16:
1497 ; GFX8-SDAG: ; %bb.0: ; %entry
1498 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1499 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v5, 1
1500 ; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v0
1501 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1502 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1503 ; GFX8-SDAG-NEXT: v_add_u16_e32 v1, 1, v1
1504 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v6, v0, v5
1505 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v5, v0
1506 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v3
1507 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v8, v4, v2
1508 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v2, v4
1509 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1
1510 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v5
1511 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3
1512 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v4, v2
1513 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v0, v6, v0
1514 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v2, v8, v2
1515 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v3, v0, v3
1516 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1
1517 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1518 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v2, v4
1519 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5
1520 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
1521 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1523 ; GFX8-GISEL-LABEL: clpeak_umad_pat_v3i16:
1524 ; GFX8-GISEL: ; %bb.0: ; %entry
1525 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1526 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v6, 1
1527 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
1528 ; GFX8-GISEL-NEXT: v_add_u16_e32 v5, 1, v0
1529 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1530 ; GFX8-GISEL-NEXT: v_add_u16_e32 v1, 1, v1
1531 ; GFX8-GISEL-NEXT: v_mad_u16 v6, v5, v2, v5
1532 ; GFX8-GISEL-NEXT: v_mad_u16 v7, v0, v4, v0
1533 ; GFX8-GISEL-NEXT: v_mad_u16 v8, v1, v3, v1
1534 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v9, v6, v2
1535 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v10, v7, v4
1536 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v5, v2, 1
1537 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1
1538 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v11, v8, v3
1539 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
1540 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v6, v2, 1
1541 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v7, v4, 1
1542 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v5, v9, v5
1543 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v10, v0
1544 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v8, v3, 1
1545 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v11, v1
1546 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v5, v2
1547 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1548 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
1549 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3
1550 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1552 ; GFX9-SDAG-LABEL: clpeak_umad_pat_v3i16:
1553 ; GFX9-SDAG: ; %bb.0: ; %entry
1554 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1555 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
1556 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1557 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1558 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1559 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
1560 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
1561 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1562 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1563 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
1564 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1
1565 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
1566 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
1567 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1568 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1569 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
1570 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
1571 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1573 ; GFX9-GISEL-LABEL: clpeak_umad_pat_v3i16:
1574 ; GFX9-GISEL: ; %bb.0: ; %entry
1575 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1576 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1577 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1
1578 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1579 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1580 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
1581 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
1582 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1583 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1584 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
1585 ; GFX9-GISEL-NEXT: v_pk_add_u16 v3, v5, 1
1586 ; GFX9-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
1587 ; GFX9-GISEL-NEXT: v_pk_add_u16 v5, v1, 1
1588 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1589 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1590 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
1591 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
1592 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1594 ; GFX10-SDAG-LABEL: clpeak_umad_pat_v3i16:
1595 ; GFX10-SDAG: ; %bb.0: ; %entry
1596 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1597 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1
1598 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1599 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
1600 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
1601 ; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
1602 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
1603 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1604 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1605 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
1606 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1
1607 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1
1608 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
1609 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1610 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1611 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
1612 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
1613 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1615 ; GFX10-GISEL-LABEL: clpeak_umad_pat_v3i16:
1616 ; GFX10-GISEL: ; %bb.0: ; %entry
1617 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1618 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1619 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1
1620 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1621 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1622 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
1623 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
1624 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1625 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1626 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
1627 ; GFX10-GISEL-NEXT: v_pk_add_u16 v3, v5, 1
1628 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
1629 ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1
1630 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1631 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1632 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
1633 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
1634 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1636 %y48 = add <3 x i16> %x, <i16 1, i16 1, i16 1>
1637 %add = mul <3 x i16> %y48, %y
1638 %mul1249 = add <3 x i16> %add, %y48
1639 %add15 = mul <3 x i16> %mul1249, %y
1640 %add1550 = add <3 x i16> %add, <i16 1, i16 1, i16 1>
1641 %add2452 = add <3 x i16> %add15, <i16 1, i16 1, i16 1>
1642 %mul3051 = mul <3 x i16> %add15, %add1550
1643 %add33 = mul <3 x i16> %mul3051, %add2452
1644 ret <3 x i16> %add33
1647 define <4 x i16> @clpeak_umad_pat_v4i16(<4 x i16> %x, <4 x i16> %y) {
1648 ; GFX67-SDAG-LABEL: clpeak_umad_pat_v4i16:
1649 ; GFX67-SDAG: ; %bb.0: ; %entry
1650 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1651 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3
1652 ; GFX67-SDAG-NEXT: v_and_b32_e32 v11, 0xffff, v3
1653 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
1654 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1655 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v11, v7, v3
1656 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1657 ; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v2
1658 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1659 ; GFX67-SDAG-NEXT: v_and_b32_e32 v6, 0xffff, v6
1660 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1661 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
1662 ; GFX67-SDAG-NEXT: v_and_b32_e32 v10, 0xffff, v1
1663 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
1664 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v5
1665 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v13, v11, v7
1666 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v7
1667 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v7, v9, v6, 1
1668 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v12, v10, v5
1669 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v9, v6, v2
1670 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v8, v4, v0
1671 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v10, v5, v1
1672 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
1673 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v13
1674 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v8, v8, v4, 1
1675 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1676 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1677 ; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v9, v7
1678 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
1679 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v9, 16, v12
1680 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1681 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v10, v0, v4
1682 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5
1683 ; GFX67-SDAG-NEXT: s_mov_b32 s4, 0x10000
1684 ; GFX67-SDAG-NEXT: v_or_b32_e32 v8, v9, v8
1685 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1
1686 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v2, v6
1687 ; GFX67-SDAG-NEXT: v_add_i32_e32 v8, vcc, s4, v8
1688 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v6, 1
1689 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1690 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v1
1691 ; GFX67-SDAG-NEXT: v_add_i32_e32 v7, vcc, s4, v7
1692 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1693 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v3
1694 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
1695 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v8
1696 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1697 ; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v6, v2
1698 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v7
1699 ; GFX67-SDAG-NEXT: v_and_b32_e32 v9, 0xffff, v10
1700 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v8
1701 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v4
1702 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v5
1703 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v7
1704 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1705 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, s4, v2
1706 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, s4, v0
1707 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v9, v8
1708 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v4, v5
1709 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
1710 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
1711 ; GFX67-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2
1712 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v8
1713 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
1714 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
1715 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
1716 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
1717 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
1718 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v7, v0
1719 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v5
1720 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2
1721 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v6
1722 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
1724 ; GFX67-GISEL-LABEL: clpeak_umad_pat_v4i16:
1725 ; GFX67-GISEL: ; %bb.0: ; %entry
1726 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1727 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1728 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1729 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v3
1730 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v1
1731 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
1732 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v0
1733 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v9, 16, v9
1734 ; GFX67-GISEL-NEXT: v_and_b32_e32 v10, 0xffff, v3
1735 ; GFX67-GISEL-NEXT: v_or_b32_e32 v8, v8, v9
1736 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v2
1737 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v10, 16, v10
1738 ; GFX67-GISEL-NEXT: v_or_b32_e32 v9, v9, v10
1739 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v8
1740 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
1741 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v8
1742 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
1743 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v10, v10, v5
1744 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v11, 16, v9
1745 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v8, v8, v4
1746 ; GFX67-GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9
1747 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
1748 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
1749 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v10, v1
1750 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v9, v9, v6
1751 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v11, v11, v7
1752 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
1753 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1754 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v9, v2
1755 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
1756 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1757 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1758 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
1759 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v2
1760 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
1761 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2
1762 ; GFX67-GISEL-NEXT: v_or_b32_e32 v1, v1, v2
1763 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0
1764 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1765 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v5
1766 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v10
1767 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
1768 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
1769 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1770 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
1771 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5
1772 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v6
1773 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v7
1774 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v9
1775 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
1776 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
1777 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
1778 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
1779 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v6
1780 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v7
1781 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
1782 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v2
1783 ; GFX67-GISEL-NEXT: v_or_b32_e32 v5, v5, v6
1784 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
1785 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v7
1786 ; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v1
1787 ; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v3
1788 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
1789 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v7, 16, v7
1790 ; GFX67-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
1791 ; GFX67-GISEL-NEXT: v_and_b32_e32 v7, 0xffff, v8
1792 ; GFX67-GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9
1793 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8
1794 ; GFX67-GISEL-NEXT: v_or_b32_e32 v7, v7, v8
1795 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v8, 16, v4
1796 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1797 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
1798 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v4
1799 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1800 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
1801 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v5
1802 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v9, 16, v5
1803 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v8
1804 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v1, v4
1805 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
1806 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v1, v9
1807 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v6
1808 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
1809 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v7
1810 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
1811 ; GFX67-GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6
1812 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v2, v1
1813 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v4
1814 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v7
1815 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
1816 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v6
1817 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v2, v4
1818 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v3, v5
1819 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
1821 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v4i16:
1822 ; GFX8-SDAG: ; %bb.0: ; %entry
1823 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1824 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v6, 1
1825 ; GFX8-SDAG-NEXT: v_add_u16_e32 v5, 1, v0
1826 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1827 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v8, 16, v2
1828 ; GFX8-SDAG-NEXT: v_add_u16_e32 v4, 1, v1
1829 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1830 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v3
1831 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v9, v0, v8
1832 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v8, v0
1833 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v1, v6
1834 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v10, v5, v2
1835 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v11, v4, v3
1836 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v6, v1
1837 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v4, v3, v4
1838 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v5, v2, v5
1839 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v8
1840 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v6
1841 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v5, v2
1842 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v4, v3
1843 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v9, v0
1844 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v1, v7, v1
1845 ; GFX8-SDAG-NEXT: v_mad_u16 v7, v2, v10, v2
1846 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, v4
1847 ; GFX8-SDAG-NEXT: v_mad_u16 v6, v3, v11, v3
1848 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, v5
1849 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
1850 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v7, v2, v7
1851 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
1852 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
1853 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v6, v3, v6
1854 ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v2, v1
1855 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
1857 ; GFX8-GISEL-LABEL: clpeak_umad_pat_v4i16:
1858 ; GFX8-GISEL: ; %bb.0: ; %entry
1859 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1860 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v5, 1
1861 ; GFX8-GISEL-NEXT: v_add_u16_e32 v4, 1, v0
1862 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1863 ; GFX8-GISEL-NEXT: v_add_u16_e32 v6, 1, v1
1864 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1865 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v2
1866 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v3
1867 ; GFX8-GISEL-NEXT: v_mad_u16 v8, v4, v2, v4
1868 ; GFX8-GISEL-NEXT: v_mad_u16 v9, v0, v5, v0
1869 ; GFX8-GISEL-NEXT: v_mad_u16 v10, v6, v3, v6
1870 ; GFX8-GISEL-NEXT: v_mad_u16 v11, v1, v7, v1
1871 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v12, v8, v2
1872 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v13, v9, v5
1873 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v4, v2, 1
1874 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v5, 1
1875 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v14, v10, v3
1876 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v15, v11, v7
1877 ; GFX8-GISEL-NEXT: v_mad_u16 v6, v6, v3, 1
1878 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v7, 1
1879 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v8, v2, 1
1880 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v9, v5, 1
1881 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v12, v4
1882 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v13, v0
1883 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v10, v3, 1
1884 ; GFX8-GISEL-NEXT: v_mad_u16 v7, v11, v7, 1
1885 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v14, v6
1886 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v15, v1
1887 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v4, v2
1888 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1889 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
1890 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v6, v3
1891 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
1892 ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v2, v1
1893 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
1895 ; GFX9-SDAG-LABEL: clpeak_umad_pat_v4i16:
1896 ; GFX9-SDAG: ; %bb.0: ; %entry
1897 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1898 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1899 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1900 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1901 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1902 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v5, v1
1903 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v4, v0
1904 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1905 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1906 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v4, -1 op_sel_hi:[1,0]
1907 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v3, v5, -1 op_sel_hi:[1,0]
1908 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
1909 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
1910 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1911 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1912 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
1913 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
1914 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1916 ; GFX9-GISEL-LABEL: clpeak_umad_pat_v4i16:
1917 ; GFX9-GISEL: ; %bb.0: ; %entry
1918 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1919 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1920 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1921 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1922 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1923 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
1924 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
1925 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1926 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1927 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
1928 ; GFX9-GISEL-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
1929 ; GFX9-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
1930 ; GFX9-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0]
1931 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1932 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1933 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
1934 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
1935 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1937 ; GFX10-SDAG-LABEL: clpeak_umad_pat_v4i16:
1938 ; GFX10-SDAG: ; %bb.0: ; %entry
1939 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1940 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
1941 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
1942 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v4, v1, v3
1943 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v5, v0, v2
1944 ; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v4, v1
1945 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v5, v0
1946 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1947 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1948 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v5, -1 op_sel_hi:[1,0]
1949 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v3, v4, -1 op_sel_hi:[1,0]
1950 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v4, v1, -1 op_sel_hi:[1,0]
1951 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v5, v0, -1 op_sel_hi:[1,0]
1952 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1953 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1954 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v5
1955 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v1, v4
1956 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1958 ; GFX10-GISEL-LABEL: clpeak_umad_pat_v4i16:
1959 ; GFX10-GISEL: ; %bb.0: ; %entry
1960 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1961 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
1962 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
1963 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v4, v0, v2
1964 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v5, v1, v3
1965 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v4, v0
1966 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v5, v1
1967 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1968 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1969 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v4, 1 op_sel_hi:[1,0]
1970 ; GFX10-GISEL-NEXT: v_pk_add_u16 v3, v5, 1 op_sel_hi:[1,0]
1971 ; GFX10-GISEL-NEXT: v_pk_add_u16 v4, v0, 1 op_sel_hi:[1,0]
1972 ; GFX10-GISEL-NEXT: v_pk_add_u16 v5, v1, 1 op_sel_hi:[1,0]
1973 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
1974 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v3
1975 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v4
1976 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v1, v5
1977 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1979 %y18 = add <4 x i16> %x, <i16 1, i16 1, i16 1, i16 1>
1980 %add = mul <4 x i16> %y18, %y
1981 %mul119 = add <4 x i16> %add, %y18
1982 %add2 = mul <4 x i16> %mul119, %y
1983 %add220 = add <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
1984 %add422 = add <4 x i16> %add2, <i16 1, i16 1, i16 1, i16 1>
1985 %mul521 = mul <4 x i16> %add2, %add220
1986 %add6 = mul <4 x i16> %mul521, %add422
1990 define <2 x i32> @clpeak_imad_pat_v2i32(<2 x i32> %x, <2 x i32> %y) {
1991 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i32:
1992 ; GFX67-SDAG: ; %bb.0: ; %entry
1993 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1994 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
1995 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
1996 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
1997 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
1998 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v4, v0
1999 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v5, v1
2000 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
2001 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
2002 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4
2003 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5
2004 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v0
2005 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v1
2006 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
2007 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
2008 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v2
2009 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v3
2010 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
2012 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i32:
2013 ; GFX67-GISEL: ; %bb.0: ; %entry
2014 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2015 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2016 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
2017 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
2018 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
2019 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
2020 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
2021 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2022 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
2023 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
2024 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
2025 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0
2026 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2027 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3
2028 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
2029 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2030 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1
2031 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
2033 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i32:
2034 ; GFX8-SDAG: ; %bb.0: ; %entry
2035 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2036 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2037 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2038 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
2039 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
2040 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v4, v0
2041 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v5, v1
2042 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
2043 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
2044 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4
2045 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5
2046 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v0
2047 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v1
2048 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
2049 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
2050 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v2
2051 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v3
2052 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
2054 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i32:
2055 ; GFX8-GISEL: ; %bb.0: ; %entry
2056 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2057 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2058 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2059 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
2060 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
2061 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v0
2062 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v5, v1
2063 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2064 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
2065 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v4
2066 ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v5
2067 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v0
2068 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2069 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3
2070 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2071 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2072 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1
2073 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
2075 ; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i32:
2076 ; GFX900-SDAG: ; %bb.0: ; %entry
2077 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2078 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2079 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
2080 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
2081 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
2082 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v4, v0
2083 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
2084 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v5, v1
2085 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3
2086 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v4, v[0:1]
2087 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v5, v[2:3]
2088 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4]
2089 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v2, v[4:5]
2090 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2092 ; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i32:
2093 ; GFX900-GISEL: ; %bb.0: ; %entry
2094 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2095 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2096 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
2097 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
2098 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
2099 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v4, v0
2100 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v5, v1
2101 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2102 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
2103 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
2104 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
2105 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2
2106 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3
2107 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2108 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
2109 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
2110 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
2111 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2113 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i32:
2114 ; GFX90A-SDAG: ; %bb.0: ; %entry
2115 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2116 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
2117 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2118 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v0, v2
2119 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3
2120 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v4, v1
2121 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v6, v0
2122 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
2123 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3
2124 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, v[2:3]
2125 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v6, v[0:1]
2126 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
2127 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
2128 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v2
2129 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
2131 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i32:
2132 ; GFX90A-GISEL: ; %bb.0: ; %entry
2133 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2134 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2135 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
2136 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
2137 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
2138 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0
2139 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v5, v1
2140 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2141 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
2142 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
2143 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
2144 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, 1, v0
2145 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, 1, v1
2146 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2147 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
2148 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2149 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2150 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
2152 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32:
2153 ; GFX10-SDAG: ; %bb.0: ; %entry
2154 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2155 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
2156 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
2157 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
2158 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
2159 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0
2160 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1
2161 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
2162 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v3
2163 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v0, v4, v[0:1]
2164 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v2, v5, v[2:3]
2165 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
2166 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v4, v2, v[4:5]
2167 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2169 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32:
2170 ; GFX10-GISEL: ; %bb.0: ; %entry
2171 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2172 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2173 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
2174 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
2175 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
2176 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0
2177 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1
2178 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2179 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
2180 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4
2181 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5
2182 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2
2183 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3
2184 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2185 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
2186 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
2187 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
2188 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2190 %y18 = add <2 x i32> %x, <i32 1, i32 1>
2191 %add = mul <2 x i32> %y18, %y
2192 %mul119 = add <2 x i32> %add, %y18
2193 %add2 = mul <2 x i32> %mul119, %y
2194 %add220 = add <2 x i32> %add, <i32 1, i32 1>
2195 %add422 = add <2 x i32> %add2, <i32 1, i32 1>
2196 %mul521 = mul <2 x i32> %add2, %add220
2197 %add6 = mul <2 x i32> %mul521, %add422
2201 define <3 x i32> @clpeak_imad_pat_v3i32(<3 x i32> %x, <3 x i32> %y) {
2202 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v3i32:
2203 ; GFX67-SDAG: ; %bb.0: ; %entry
2204 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2205 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2206 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
2207 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2208 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5
2209 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3
2210 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4
2211 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v6, v2
2212 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v7, v0
2213 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v8, v1
2214 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v2, v5
2215 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3
2216 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4
2217 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v2, v6
2218 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v0, v7
2219 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v8
2220 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v2
2221 ; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v0
2222 ; GFX67-SDAG-NEXT: v_add_i32_e32 v5, vcc, v5, v1
2223 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v4, v0
2224 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v5, v1
2225 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v3, v2
2226 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v4
2227 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v5
2228 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v3
2229 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
2231 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v3i32:
2232 ; GFX67-GISEL: ; %bb.0: ; %entry
2233 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2234 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2235 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
2236 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2237 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3
2238 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4
2239 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5
2240 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
2241 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v7, v1
2242 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v8, v2
2243 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2244 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2245 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
2246 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v6
2247 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v7
2248 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v8
2249 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
2250 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v1
2251 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2252 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2253 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v2, v5
2254 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2255 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6
2256 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7
2257 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v3, v2
2258 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
2260 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v3i32:
2261 ; GFX8-SDAG: ; %bb.0: ; %entry
2262 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2263 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2264 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2265 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2266 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5
2267 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3
2268 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4
2269 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v6, v2
2270 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v7, v0
2271 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v8, v1
2272 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v2, v5
2273 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3
2274 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4
2275 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v2, v6
2276 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v0, v7
2277 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v8
2278 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v2
2279 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v0
2280 ; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v5, v1
2281 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v4, v0
2282 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v5, v1
2283 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v3, v2
2284 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v4
2285 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v5
2286 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v3
2287 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
2289 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v3i32:
2290 ; GFX8-GISEL: ; %bb.0: ; %entry
2291 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2292 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2293 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2294 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2295 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3
2296 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4
2297 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5
2298 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v6, v0
2299 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v7, v1
2300 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v8, v2
2301 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2302 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2303 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
2304 ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v6
2305 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v7
2306 ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 1, v8
2307 ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0
2308 ; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v1
2309 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2310 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2311 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v2, v5
2312 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2313 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6
2314 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7
2315 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v3, v2
2316 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
2318 ; GFX900-SDAG-LABEL: clpeak_imad_pat_v3i32:
2319 ; GFX900-SDAG: ; %bb.0: ; %entry
2320 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2321 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v2
2322 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5
2323 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2324 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
2325 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3
2326 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4
2327 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v6, v2
2328 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v2, v5
2329 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v7, v0
2330 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v8, v1
2331 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3
2332 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4
2333 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, v[5:6]
2334 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v7, v[0:1]
2335 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v2, v8, v[2:3]
2336 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
2337 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v7, v2, v[7:8]
2338 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v3, v5, v[3:4]
2339 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2341 ; GFX900-GISEL-LABEL: clpeak_imad_pat_v3i32:
2342 ; GFX900-GISEL: ; %bb.0: ; %entry
2343 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2344 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2345 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
2346 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v2
2347 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3
2348 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4
2349 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5
2350 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v6, v0
2351 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v7, v1
2352 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, v8, v2
2353 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2354 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2355 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
2356 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v6
2357 ; GFX900-GISEL-NEXT: v_add_u32_e32 v4, 1, v7
2358 ; GFX900-GISEL-NEXT: v_add_u32_e32 v5, 1, v8
2359 ; GFX900-GISEL-NEXT: v_add_u32_e32 v6, 1, v0
2360 ; GFX900-GISEL-NEXT: v_add_u32_e32 v7, 1, v1
2361 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2362 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2363 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v3, v2, v5
2364 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v2
2365 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6
2366 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7
2367 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v3, v2
2368 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2370 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_v3i32:
2371 ; GFX90A-SDAG: ; %bb.0: ; %entry
2372 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2373 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
2374 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2375 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v2
2376 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v10, v0, v3
2377 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v1, v4
2378 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v2, v5
2379 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v8, v1
2380 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v10, v0
2381 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v7, v6, v2
2382 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3
2383 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4
2384 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v7, v5
2385 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v8, v[2:3]
2386 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v0, v10, v[0:1]
2387 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v6, v[4:5]
2388 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v0, v[10:11]
2389 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v8, v2, v[8:9]
2390 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v4, v[6:7]
2391 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v8
2392 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
2394 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_v3i32:
2395 ; GFX90A-GISEL: ; %bb.0: ; %entry
2396 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2397 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2398 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
2399 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v2
2400 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3
2401 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4
2402 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5
2403 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v6, v0
2404 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v7, v1
2405 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, v8, v2
2406 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2407 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2408 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
2409 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v6
2410 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, 1, v7
2411 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, 1, v8
2412 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v6, 1, v0
2413 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v7, 1, v1
2414 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v8, 1, v2
2415 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2416 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2417 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
2418 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v6
2419 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v7
2420 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v2, v8
2421 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
2423 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v3i32:
2424 ; GFX10-SDAG: ; %bb.0: ; %entry
2425 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2426 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
2427 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
2428 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2
2429 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v0, v3
2430 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v1, v4
2431 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v8, v2, v5
2432 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v6, v0
2433 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v7, v1
2434 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v9, v8, v2
2435 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v3
2436 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v4
2437 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v3, v9, v5
2438 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v0, v6, v[0:1]
2439 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v2, v7, v[2:3]
2440 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[6:7], null, v3, v8, v[3:4]
2441 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v0, v[4:5]
2442 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v5, v2, v[5:6]
2443 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v6, v3, v[6:7]
2444 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2446 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v3i32:
2447 ; GFX10-GISEL: ; %bb.0: ; %entry
2448 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2449 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2450 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
2451 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
2452 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v0, v3
2453 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v7, v1, v4
2454 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v8, v2, v5
2455 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v6, v0
2456 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v7, v1
2457 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v8, v2
2458 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v3
2459 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
2460 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
2461 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v6
2462 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v7
2463 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v8
2464 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3
2465 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v1, v4
2466 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v2, v5
2467 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2468 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
2469 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
2470 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v3, v0
2471 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
2472 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2
2473 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2475 %y48 = add <3 x i32> %x, <i32 1, i32 1, i32 1>
2476 %add = mul <3 x i32> %y48, %y
2477 %mul1249 = add <3 x i32> %add, %y48
2478 %add15 = mul <3 x i32> %mul1249, %y
2479 %add1550 = add <3 x i32> %add, <i32 1, i32 1, i32 1>
2480 %add2452 = add <3 x i32> %add15, <i32 1, i32 1, i32 1>
2481 %mul3051 = mul <3 x i32> %add15, %add1550
2482 %add33 = mul <3 x i32> %mul3051, %add2452
2483 ret <3 x i32> %add33
2486 define <4 x i32> @clpeak_imad_pat_v4i32(<4 x i32> %x, <4 x i32> %y) {
2487 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v4i32:
2488 ; GFX67-SDAG: ; %bb.0: ; %entry
2489 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2490 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v3
2491 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2492 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
2493 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2494 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4
2495 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v9, v3, v7
2496 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v10, v1, v5
2497 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v11, v2, v6
2498 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v8, v0
2499 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v9, v3
2500 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v11, v2
2501 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v10, v1
2502 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v3, v7
2503 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6
2504 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4
2505 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v5
2506 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v3, v9
2507 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v2, v11
2508 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v6, v0, v8
2509 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v7, v1, v10
2510 ; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v3
2511 ; GFX67-SDAG-NEXT: v_add_i32_e32 v5, vcc, v5, v2
2512 ; GFX67-SDAG-NEXT: v_add_i32_e32 v6, vcc, v6, v0
2513 ; GFX67-SDAG-NEXT: v_add_i32_e32 v7, vcc, v7, v1
2514 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v6, v0
2515 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v7, v1
2516 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2
2517 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v4, v3
2518 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v6
2519 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v7
2520 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v5
2521 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v4
2522 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
2524 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v4i32:
2525 ; GFX67-GISEL: ; %bb.0: ; %entry
2526 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2527 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2528 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
2529 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
2530 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v3
2531 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4
2532 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5
2533 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6
2534 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7
2535 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v8, v0
2536 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v9, v1
2537 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v10, v2
2538 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v11, v3
2539 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2540 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2541 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2542 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2543 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v8
2544 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v9
2545 ; GFX67-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v10
2546 ; GFX67-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
2547 ; GFX67-GISEL-NEXT: v_add_i32_e32 v8, vcc, 1, v0
2548 ; GFX67-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v1
2549 ; GFX67-GISEL-NEXT: v_add_i32_e32 v10, vcc, 1, v2
2550 ; GFX67-GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v3
2551 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2552 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2553 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2554 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2555 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8
2556 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v9
2557 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v2, v10
2558 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v3, v11
2559 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
2561 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v4i32:
2562 ; GFX8-SDAG: ; %bb.0: ; %entry
2563 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2564 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, 1, v3
2565 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2566 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2567 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2568 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4
2569 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v9, v3, v7
2570 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v10, v1, v5
2571 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v11, v2, v6
2572 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v8, v0
2573 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v9, v3
2574 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v11, v2
2575 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v10, v1
2576 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v3, v7
2577 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6
2578 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4
2579 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v5
2580 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v3, v9
2581 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v2, v11
2582 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v0, v8
2583 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v7, v1, v10
2584 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v3
2585 ; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v5, v2
2586 ; GFX8-SDAG-NEXT: v_add_u32_e32 v6, vcc, v6, v0
2587 ; GFX8-SDAG-NEXT: v_add_u32_e32 v7, vcc, v7, v1
2588 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v6, v0
2589 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v7, v1
2590 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2
2591 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v4, v3
2592 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v6
2593 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v7
2594 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v5
2595 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v4
2596 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
2598 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v4i32:
2599 ; GFX8-GISEL: ; %bb.0: ; %entry
2600 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2601 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2602 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1
2603 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v2
2604 ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v3
2605 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4
2606 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5
2607 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6
2608 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7
2609 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v8, v0
2610 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v9, v1
2611 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v10, v2
2612 ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, v11, v3
2613 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2614 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2615 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2616 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2617 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v8
2618 ; GFX8-GISEL-NEXT: v_add_u32_e32 v5, vcc, 1, v9
2619 ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v10
2620 ; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v11
2621 ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, 1, v0
2622 ; GFX8-GISEL-NEXT: v_add_u32_e32 v9, vcc, 1, v1
2623 ; GFX8-GISEL-NEXT: v_add_u32_e32 v10, vcc, 1, v2
2624 ; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 1, v3
2625 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2626 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2627 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2628 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2629 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8
2630 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v9
2631 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v2, v10
2632 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v3, v11
2633 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
2635 ; GFX900-SDAG-LABEL: clpeak_imad_pat_v4i32:
2636 ; GFX900-SDAG: ; %bb.0: ; %entry
2637 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2638 ; GFX900-SDAG-NEXT: v_add_u32_e32 v3, 1, v3
2639 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2640 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v2
2641 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v8, v3, v7
2642 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v11, v0, v4
2643 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v2, v6
2644 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
2645 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v1, v5
2646 ; GFX900-SDAG-NEXT: v_add_u32_e32 v3, v8, v3
2647 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v11, v0
2648 ; GFX900-SDAG-NEXT: v_add_u32_e32 v12, v9, v2
2649 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4
2650 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v3, v7
2651 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6
2652 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v10, v1
2653 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5
2654 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v4, v8, v[4:5]
2655 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v3, v9, v[3:4]
2656 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v0, v11, v[0:1]
2657 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v2, v10, v[2:3]
2658 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v7, v0, v[7:8]
2659 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v8, v2, v[8:9]
2660 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v3, v[6:7]
2661 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v4, v[5:6]
2662 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2664 ; GFX9-GISEL-LABEL: clpeak_imad_pat_v4i32:
2665 ; GFX9-GISEL: ; %bb.0: ; %entry
2666 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2667 ; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2668 ; GFX9-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
2669 ; GFX9-GISEL-NEXT: v_add_u32_e32 v2, 1, v2
2670 ; GFX9-GISEL-NEXT: v_add_u32_e32 v3, 1, v3
2671 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4
2672 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5
2673 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6
2674 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7
2675 ; GFX9-GISEL-NEXT: v_add_u32_e32 v0, v8, v0
2676 ; GFX9-GISEL-NEXT: v_add_u32_e32 v1, v9, v1
2677 ; GFX9-GISEL-NEXT: v_add_u32_e32 v2, v10, v2
2678 ; GFX9-GISEL-NEXT: v_add_u32_e32 v3, v11, v3
2679 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2680 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2681 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2682 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2683 ; GFX9-GISEL-NEXT: v_add_u32_e32 v4, 1, v8
2684 ; GFX9-GISEL-NEXT: v_add_u32_e32 v5, 1, v9
2685 ; GFX9-GISEL-NEXT: v_add_u32_e32 v6, 1, v10
2686 ; GFX9-GISEL-NEXT: v_add_u32_e32 v7, 1, v11
2687 ; GFX9-GISEL-NEXT: v_add_u32_e32 v8, 1, v0
2688 ; GFX9-GISEL-NEXT: v_add_u32_e32 v9, 1, v1
2689 ; GFX9-GISEL-NEXT: v_add_u32_e32 v10, 1, v2
2690 ; GFX9-GISEL-NEXT: v_add_u32_e32 v11, 1, v3
2691 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2692 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2693 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2694 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2695 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8
2696 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v1, v1, v9
2697 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v2, v2, v10
2698 ; GFX9-GISEL-NEXT: v_mul_lo_u32 v3, v3, v11
2699 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
2701 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_v4i32:
2702 ; GFX90A-SDAG: ; %bb.0: ; %entry
2703 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2704 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, 1, v3
2705 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v2
2706 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
2707 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2708 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v14, v0, v4
2709 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v12, v1, v5
2710 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6
2711 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v3, v7
2712 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, v8, v3
2713 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v9, v10, v2
2714 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v12, v1
2715 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v14, v0
2716 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4
2717 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5
2718 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v9, v6
2719 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v3, v7
2720 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v6, v8, v[6:7]
2721 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v4, v10, v[4:5]
2722 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v2, v12, v[2:3]
2723 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v0, v14, v[0:1]
2724 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v14, v0, v[14:15]
2725 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v12, v2, v[12:13]
2726 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v4, v[10:11]
2727 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v6, v[8:9]
2728 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v12
2729 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v3, v4
2730 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
2732 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v4i32:
2733 ; GFX10-SDAG: ; %bb.0: ; %entry
2734 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2735 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
2736 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
2737 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v2
2738 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v3
2739 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v8, v0, v4
2740 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v1, v5
2741 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6
2742 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v11, v3, v7
2743 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v8, v0
2744 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v9, v1
2745 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v12, v10, v2
2746 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4
2747 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v4, v11, v3
2748 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v5
2749 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6
2750 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v4, v7
2751 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v0, v8, v[0:1]
2752 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[6:7], null, v2, v9, v[2:3]
2753 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v3, v10, v[3:4]
2754 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[8:9], null, v4, v11, v[4:5]
2755 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v5, v0, v[5:6]
2756 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v6, v2, v[6:7]
2757 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v7, v3, v[7:8]
2758 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v8, v4, v[8:9]
2759 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2761 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v4i32:
2762 ; GFX10-GISEL: ; %bb.0: ; %entry
2763 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2764 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2765 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
2766 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
2767 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3
2768 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v8, v0, v4
2769 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v9, v1, v5
2770 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v10, v2, v6
2771 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v11, v3, v7
2772 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v8, v0
2773 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v9, v1
2774 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v10, v2
2775 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, v11, v3
2776 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2777 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
2778 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v6
2779 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v3, v7
2780 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v4, 1, v8
2781 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v5, 1, v9
2782 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v6, 1, v10
2783 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v7, 1, v11
2784 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v8, 1, v0
2785 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
2786 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v1, v5
2787 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v2, v6
2788 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v3, v7
2789 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
2790 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v2
2791 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v3
2792 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v8
2793 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
2794 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2
2795 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3
2796 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2798 %y18 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
2799 %add = mul <4 x i32> %y18, %y
2800 %mul119 = add <4 x i32> %add, %y18
2801 %add2 = mul <4 x i32> %mul119, %y
2802 %add220 = add <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
2803 %add422 = add <4 x i32> %add2, <i32 1, i32 1, i32 1, i32 1>
2804 %mul521 = mul <4 x i32> %add2, %add220
2805 %add6 = mul <4 x i32> %mul521, %add422
2809 define i32 @clpeak_imad_pat_i24(i32 %x, i32 %y) {
2810 ; GFX67-SDAG-LABEL: clpeak_imad_pat_i24:
2811 ; GFX67-SDAG: ; %bb.0: ; %entry
2812 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2813 ; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24
2814 ; GFX67-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24
2815 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2816 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
2817 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2818 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
2819 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
2820 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0
2821 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
2822 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1
2823 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
2825 ; GFX67-GISEL-LABEL: clpeak_imad_pat_i24:
2826 ; GFX67-GISEL: ; %bb.0: ; %entry
2827 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2828 ; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24
2829 ; GFX67-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24
2830 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2831 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
2832 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2833 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2834 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
2835 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
2836 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2837 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
2838 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
2840 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i24:
2841 ; GFX8-SDAG: ; %bb.0: ; %entry
2842 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2843 ; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24
2844 ; GFX8-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24
2845 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2846 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
2847 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0
2848 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
2849 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
2850 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0
2851 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
2852 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
2853 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
2855 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i24:
2856 ; GFX8-GISEL: ; %bb.0: ; %entry
2857 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2858 ; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24
2859 ; GFX8-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24
2860 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2861 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
2862 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0
2863 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2864 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2
2865 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
2866 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
2867 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
2868 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
2870 ; GFX900-SDAG-LABEL: clpeak_imad_pat_i24:
2871 ; GFX900-SDAG: ; %bb.0: ; %entry
2872 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2873 ; GFX900-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24
2874 ; GFX900-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24
2875 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2876 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
2877 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
2878 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
2879 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
2880 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
2881 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
2883 ; GFX900-GISEL-LABEL: clpeak_imad_pat_i24:
2884 ; GFX900-GISEL: ; %bb.0: ; %entry
2885 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2886 ; GFX900-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24
2887 ; GFX900-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24
2888 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2889 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
2890 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
2891 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2892 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
2893 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
2894 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2895 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
2896 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
2898 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_i24:
2899 ; GFX90A-SDAG: ; %bb.0: ; %entry
2900 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2901 ; GFX90A-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24
2902 ; GFX90A-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24
2903 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
2904 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
2905 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
2906 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
2907 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
2908 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
2909 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
2911 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_i24:
2912 ; GFX90A-GISEL: ; %bb.0: ; %entry
2913 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2914 ; GFX90A-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24
2915 ; GFX90A-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24
2916 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
2917 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
2918 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
2919 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2920 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
2921 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
2922 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2923 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
2924 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
2926 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i24:
2927 ; GFX10-SDAG: ; %bb.0: ; %entry
2928 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2929 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 24
2930 ; GFX10-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 24
2931 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
2932 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
2933 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
2934 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
2935 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
2936 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
2937 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
2939 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i24:
2940 ; GFX10-GISEL: ; %bb.0: ; %entry
2941 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2942 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 24
2943 ; GFX10-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 24
2944 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2945 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
2946 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0
2947 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2948 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2
2949 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
2950 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
2951 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
2952 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
2954 %shl = shl i32 %x, 8
2955 %shr = ashr exact i32 %shl, 8
2956 %shl1 = shl i32 %y, 8
2957 %shr2 = ashr exact i32 %shl1, 8
2958 %shr222 = add nsw i32 %shr, 1
2959 %add = mul i32 %shr2, %shr222
2960 %mul323 = add i32 %add, %shr222
2961 %add4 = mul i32 %mul323, %shr2
2962 %add424 = add i32 %add, 1
2963 %add626 = add i32 %add4, 1
2964 %mul725 = mul i32 %add4, %add424
2965 %add8 = mul i32 %mul725, %add626
2969 define i32 @clpeak_imad_pat_u24(i32 %x, i32 %y) {
2970 ; GFX67-SDAG-LABEL: clpeak_imad_pat_u24:
2971 ; GFX67-SDAG: ; %bb.0: ; %entry
2972 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2973 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2974 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2975 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2976 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
2977 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2978 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
2979 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
2980 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0
2981 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
2982 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1
2983 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
2985 ; GFX67-GISEL-LABEL: clpeak_imad_pat_u24:
2986 ; GFX67-GISEL: ; %bb.0: ; %entry
2987 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2988 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
2989 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
2990 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2991 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
2992 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
2993 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
2994 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
2995 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
2996 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
2997 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
2998 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
3000 ; GFX8-SDAG-LABEL: clpeak_imad_pat_u24:
3001 ; GFX8-SDAG: ; %bb.0: ; %entry
3002 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3003 ; GFX8-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3004 ; GFX8-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3005 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
3006 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
3007 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0
3008 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
3009 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
3010 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0
3011 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
3012 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
3013 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
3015 ; GFX8-GISEL-LABEL: clpeak_imad_pat_u24:
3016 ; GFX8-GISEL: ; %bb.0: ; %entry
3017 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3018 ; GFX8-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3019 ; GFX8-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3020 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
3021 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
3022 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0
3023 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
3024 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2
3025 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
3026 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
3027 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
3028 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
3030 ; GFX900-SDAG-LABEL: clpeak_imad_pat_u24:
3031 ; GFX900-SDAG: ; %bb.0: ; %entry
3032 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3033 ; GFX900-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3034 ; GFX900-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3035 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
3036 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
3037 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
3038 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
3039 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
3040 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
3041 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3043 ; GFX900-GISEL-LABEL: clpeak_imad_pat_u24:
3044 ; GFX900-GISEL: ; %bb.0: ; %entry
3045 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3046 ; GFX900-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3047 ; GFX900-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3048 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
3049 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
3050 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
3051 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
3052 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
3053 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
3054 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
3055 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
3056 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3058 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_u24:
3059 ; GFX90A-SDAG: ; %bb.0: ; %entry
3060 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3061 ; GFX90A-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3062 ; GFX90A-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3063 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
3064 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
3065 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
3066 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
3067 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
3068 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
3069 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
3071 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_u24:
3072 ; GFX90A-GISEL: ; %bb.0: ; %entry
3073 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3074 ; GFX90A-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3075 ; GFX90A-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3076 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
3077 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
3078 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
3079 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
3080 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
3081 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
3082 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
3083 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
3084 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
3086 ; GFX10-SDAG-LABEL: clpeak_imad_pat_u24:
3087 ; GFX10-SDAG: ; %bb.0: ; %entry
3088 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3089 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3090 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3091 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
3092 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v1, v0
3093 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
3094 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
3095 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
3096 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
3097 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3099 ; GFX10-GISEL-LABEL: clpeak_imad_pat_u24:
3100 ; GFX10-GISEL: ; %bb.0: ; %entry
3101 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3102 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffffff, v0
3103 ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffffff, v1
3104 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
3105 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v1, v0
3106 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0
3107 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
3108 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2
3109 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
3110 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
3111 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
3112 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3114 %shl = and i32 %x, 16777215
3115 %shl1 = and i32 %y, 16777215
3116 %shl122 = add nuw nsw i32 %shl, 1
3117 %add = mul i32 %shl1, %shl122
3118 %mul323 = add i32 %add, %shl122
3119 %add4 = mul i32 %mul323, %shl1
3120 %add424 = add i32 %add, 1
3121 %add626 = add i32 %add4, 1
3122 %mul725 = mul i32 %add4, %add424
3123 %add8 = mul i32 %mul725, %add626
3127 define signext i8 @clpeak_imad_pat_i8(i8 signext %x, i8 signext %y) {
3128 ; GFX67-SDAG-LABEL: clpeak_imad_pat_i8:
3129 ; GFX67-SDAG: ; %bb.0: ; %entry
3130 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3131 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3132 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
3133 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v1
3134 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
3135 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3136 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3137 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
3138 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
3139 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
3140 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
3141 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v4
3142 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3143 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
3144 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
3145 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3146 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3147 ; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
3148 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
3150 ; GFX67-GISEL-LABEL: clpeak_imad_pat_i8:
3151 ; GFX67-GISEL: ; %bb.0: ; %entry
3152 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3153 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3154 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
3155 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
3156 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3157 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3158 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
3159 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v0
3160 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
3161 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3162 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
3163 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v1
3164 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3165 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3166 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
3167 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
3168 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
3169 ; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
3170 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
3172 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i8:
3173 ; GFX8-SDAG: ; %bb.0: ; %entry
3174 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3175 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
3176 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
3177 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
3178 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
3179 ; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
3180 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
3182 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i8:
3183 ; GFX8-GISEL: ; %bb.0: ; %entry
3184 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3185 ; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
3186 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
3187 ; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
3188 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
3189 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
3190 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4
3191 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v3, v2, 1
3192 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
3193 ; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
3194 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
3196 ; GFX9-SDAG-LABEL: clpeak_imad_pat_i8:
3197 ; GFX9-SDAG: ; %bb.0: ; %entry
3198 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3199 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
3200 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
3201 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
3202 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
3203 ; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
3204 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3206 ; GFX9-GISEL-LABEL: clpeak_imad_pat_i8:
3207 ; GFX9-GISEL: ; %bb.0: ; %entry
3208 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3209 ; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
3210 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
3211 ; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
3212 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
3213 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1
3214 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v4
3215 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v3, v2, 1
3216 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
3217 ; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
3218 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3220 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i8:
3221 ; GFX10-SDAG: ; %bb.0: ; %entry
3222 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3223 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
3224 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
3225 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
3226 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
3227 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 8
3228 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3230 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i8:
3231 ; GFX10-GISEL: ; %bb.0: ; %entry
3232 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3233 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
3234 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
3235 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
3236 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
3237 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
3238 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
3239 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
3240 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
3241 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 8
3242 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3244 %conv33 = add i8 %x, 1
3245 %add = mul i8 %conv33, %y
3246 %conv434 = add i8 %y, 1
3247 %add8 = mul i8 %conv434, %add
3248 %conv1035 = add i8 %add, 1
3249 %add14 = mul i8 %conv1035, %add8
3250 %conv1636 = add i8 %add8, 1
3251 %add20 = mul i8 %add14, %conv1636
3255 define <2 x i8> @clpeak_imad_pat_v2i8(<2 x i8> %x, <2 x i8> %y) {
3256 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i8:
3257 ; GFX67-SDAG: ; %bb.0: ; %entry
3258 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3259 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3260 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xff, v1
3261 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
3262 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3263 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1
3264 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v0
3265 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xff, v2
3266 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3267 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v5, v3
3268 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0
3269 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
3270 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v4, v2, 1
3271 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
3272 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
3273 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v4, 8, v6
3274 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v5, v0, v2
3275 ; GFX67-SDAG-NEXT: v_or_b32_e32 v3, v4, v3
3276 ; GFX67-SDAG-NEXT: s_movk_i32 s4, 0x100
3277 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
3278 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, s4, v3
3279 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
3280 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v1
3281 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v2, v0
3282 ; GFX67-SDAG-NEXT: v_bfe_u32 v2, v3, 8, 8
3283 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xff, v5
3284 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
3285 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3286 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 0x100, v0
3287 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v4, v3
3288 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v2
3289 ; GFX67-SDAG-NEXT: v_bfe_u32 v2, v0, 8, 8
3290 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xff, v3
3291 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xff, v0
3292 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3293 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0
3294 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v2
3295 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
3297 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i8:
3298 ; GFX67-GISEL: ; %bb.0: ; %entry
3299 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3300 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3301 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
3302 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xff, v0
3303 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
3304 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
3305 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xff, v1
3306 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3
3307 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
3308 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
3309 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
3310 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
3311 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3312 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
3313 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
3314 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
3315 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
3316 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0
3317 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
3318 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2
3319 ; GFX67-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v1
3320 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3321 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
3322 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3
3323 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
3324 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xff, v0
3325 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v4
3326 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
3327 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1
3328 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xff, v5
3329 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
3330 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
3332 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i8:
3333 ; GFX8-SDAG: ; %bb.0: ; %entry
3334 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3335 ; GFX8-SDAG-NEXT: v_add_u16_e32 v1, 1, v1
3336 ; GFX8-SDAG-NEXT: v_add_u16_e32 v0, 1, v0
3337 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v1, v3
3338 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1
3339 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v0, v2
3340 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0
3341 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3
3342 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2
3343 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v4, v1
3344 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v5, v0
3345 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
3346 ; GFX8-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3347 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
3348 ; GFX8-SDAG-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3349 ; GFX8-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3350 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
3352 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i8:
3353 ; GFX8-GISEL: ; %bb.0: ; %entry
3354 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3355 ; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
3356 ; GFX8-GISEL-NEXT: v_add_u16_e32 v1, 1, v1
3357 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v0, v2, v0
3358 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v1, v3, v1
3359 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2
3360 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3
3361 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v2, 1
3362 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v3, 1
3363 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
3364 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
3365 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v6, v0
3366 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v7, v1
3367 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2
3368 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3
3369 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
3371 ; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i8:
3372 ; GFX9-SDAG: ; %bb.0: ; %entry
3373 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3374 ; GFX9-SDAG-NEXT: v_add_u16_e32 v1, 1, v1
3375 ; GFX9-SDAG-NEXT: v_add_u16_e32 v0, 1, v0
3376 ; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v4, v1, v3
3377 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v1, v3, v1
3378 ; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v5, v0, v2
3379 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v2, v0
3380 ; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3
3381 ; GFX9-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2
3382 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v3, v1, v4, v1
3383 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v2, v0, v5, v0
3384 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v3, v1, v3
3385 ; GFX9-SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v1
3386 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v2, v0, v2
3387 ; GFX9-SDAG-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3388 ; GFX9-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3389 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
3391 ; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i8:
3392 ; GFX9-GISEL: ; %bb.0: ; %entry
3393 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3394 ; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
3395 ; GFX9-GISEL-NEXT: v_add_u16_e32 v1, 1, v1
3396 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v4, v0, v2, v0
3397 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v5, v1, v3, v1
3398 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2
3399 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3
3400 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v2, 1
3401 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v1, v3, 1
3402 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v4, v2, 1
3403 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v3, v5, v3, 1
3404 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v6, v0
3405 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v7, v1
3406 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2
3407 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v3
3408 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
3410 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i8:
3411 ; GFX10-SDAG: ; %bb.0: ; %entry
3412 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3413 ; GFX10-SDAG-NEXT: v_add_nc_u16 v1, v1, 1
3414 ; GFX10-SDAG-NEXT: v_add_nc_u16 v0, v0, 1
3415 ; GFX10-SDAG-NEXT: v_mad_u16 v4, v1, v3, v1
3416 ; GFX10-SDAG-NEXT: v_mul_lo_u16 v1, v1, v3
3417 ; GFX10-SDAG-NEXT: v_mad_u16 v5, v0, v2, v0
3418 ; GFX10-SDAG-NEXT: v_mul_lo_u16 v0, v0, v2
3419 ; GFX10-SDAG-NEXT: v_mul_lo_u16 v3, v4, v3
3420 ; GFX10-SDAG-NEXT: v_mul_lo_u16 v2, v5, v2
3421 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
3422 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
3423 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v1, v3, v1
3424 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v2, v0
3425 ; GFX10-SDAG-NEXT: v_lshlrev_b16 v2, 8, v1
3426 ; GFX10-SDAG-NEXT: v_and_b32_e32 v1, 0xff, v1
3427 ; GFX10-SDAG-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
3428 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3430 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i8:
3431 ; GFX10-GISEL: ; %bb.0: ; %entry
3432 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3433 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
3434 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
3435 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v4, v0, v2
3436 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v5, v1, v3
3437 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v4, v0
3438 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v5, v1
3439 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
3440 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
3441 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v4, 1
3442 ; GFX10-GISEL-NEXT: v_add_nc_u16 v3, v5, 1
3443 ; GFX10-GISEL-NEXT: v_add_nc_u16 v4, v0, 1
3444 ; GFX10-GISEL-NEXT: v_add_nc_u16 v5, v1, 1
3445 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v2
3446 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v3
3447 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v4
3448 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v5
3449 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3451 %y18 = add <2 x i8> %x, <i8 1, i8 1>
3452 %add = mul <2 x i8> %y18, %y
3453 %mul119 = add <2 x i8> %add, %y18
3454 %add2 = mul <2 x i8> %mul119, %y
3455 %add220 = add <2 x i8> %add, <i8 1, i8 1>
3456 %add422 = add <2 x i8> %add2, <i8 1, i8 1>
3457 %mul521 = mul <2 x i8> %add2, %add220
3458 %add6 = mul <2 x i8> %mul521, %add422
3462 define i64 @clpeak_imad_pat_i64(i64 %x, i64 %y) {
3463 ; GFX6-SDAG-LABEL: clpeak_imad_pat_i64:
3464 ; GFX6-SDAG: ; %bb.0: ; %entry
3465 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3466 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3467 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v4, v0, v3
3468 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v5, v0, v2
3469 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3470 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v6, v1, v2
3471 ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v5, v4
3472 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2
3473 ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v6
3474 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v0
3475 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v0, v3
3476 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v6, v0, v2
3477 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
3478 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
3479 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v1, v2
3480 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4
3481 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v4, v0, v5
3482 ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v6, v3
3483 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3484 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5
3485 ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v4, v2
3486 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v4, v0, v5
3487 ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v3
3488 ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v4, v0
3489 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v2, vcc, v2, v1, vcc
3490 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
3491 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v4, v3, v0
3492 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v2, v0
3493 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v3, v0
3494 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v4, v1
3495 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v5
3496 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v3
3497 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v1, v2, vcc
3498 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
3500 ; GFX6-GISEL-LABEL: clpeak_imad_pat_i64:
3501 ; GFX6-GISEL: ; %bb.0: ; %entry
3502 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3503 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3504 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3505 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v1, v2
3506 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v3
3507 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v7, v0, v2
3508 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v6, v0, v2
3509 ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
3510 ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v7
3511 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v6, v0
3512 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, v4, v1, vcc
3513 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v1, v2
3514 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v0, v3
3515 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v2
3516 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v0, v0, v2
3517 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
3518 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
3519 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v6
3520 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v4, vcc
3521 ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
3522 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v4, vcc, 0, v0, vcc
3523 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
3524 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v5, v2
3525 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v6, v5, v1
3526 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v1, v5, v1
3527 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
3528 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
3529 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v0, v3
3530 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v6, v4
3531 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v6, v3
3532 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v3, v6, v3
3533 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
3534 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
3535 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
3537 ; GFX7-SDAG-LABEL: clpeak_imad_pat_i64:
3538 ; GFX7-SDAG: ; %bb.0: ; %entry
3539 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3540 ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v0
3541 ; GFX7-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
3542 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3
3543 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
3544 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v7, v5, v2
3545 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v6
3546 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v7
3547 ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, v0, v4
3548 ; GFX7-SDAG-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc
3549 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3
3550 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v2, 0
3551 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2
3552 ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v6
3553 ; GFX7-SDAG-NEXT: v_add_i32_e32 v4, vcc, v4, v2
3554 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v4, v0
3555 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v0, v[3:4]
3556 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v3, v1
3557 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v6
3558 ; GFX7-SDAG-NEXT: v_add_i32_e32 v6, vcc, v0, v1
3559 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v6, v3
3560 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, v[5:6]
3561 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v3, v5, v4
3562 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v1
3563 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v3, v1
3564 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
3566 ; GFX7-GISEL-LABEL: clpeak_imad_pat_i64:
3567 ; GFX7-GISEL: ; %bb.0: ; %entry
3568 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3569 ; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
3570 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
3571 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
3572 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
3573 ; GFX7-GISEL-NEXT: v_add_i32_e32 v8, vcc, v0, v6
3574 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
3575 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
3576 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v9, vcc, v4, v7, vcc
3577 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v6
3578 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
3579 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
3580 ; GFX7-GISEL-NEXT: v_add_i32_e32 v6, vcc, 1, v0
3581 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc
3582 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
3583 ; GFX7-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v5
3584 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v4
3585 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
3586 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
3587 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v2, vcc
3588 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
3589 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
3590 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
3591 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
3593 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i64:
3594 ; GFX8-SDAG: ; %bb.0: ; %entry
3595 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3596 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, 1, v0
3597 ; GFX8-SDAG-NEXT: v_addc_u32_e32 v5, vcc, 0, v1, vcc
3598 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3
3599 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
3600 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v7, v5, v2
3601 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v6
3602 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v7
3603 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v0, v4
3604 ; GFX8-SDAG-NEXT: v_addc_u32_e32 v5, vcc, v1, v5, vcc
3605 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3
3606 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v4, v2, 0
3607 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2
3608 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v6
3609 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v4, v2
3610 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v4, v0
3611 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v3, v0, v[3:4]
3612 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v3, v1
3613 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v6
3614 ; GFX8-SDAG-NEXT: v_add_u32_e32 v6, vcc, v0, v1
3615 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v6, v3
3616 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, v[5:6]
3617 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v5, v4
3618 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v1
3619 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v3, v1
3620 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
3622 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i64:
3623 ; GFX8-GISEL: ; %bb.0: ; %entry
3624 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3625 ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0
3626 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v7, vcc, 0, v1, vcc
3627 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
3628 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
3629 ; GFX8-GISEL-NEXT: v_add_u32_e32 v8, vcc, v0, v6
3630 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
3631 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
3632 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v9, vcc, v4, v7, vcc
3633 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v6
3634 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
3635 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
3636 ; GFX8-GISEL-NEXT: v_add_u32_e32 v6, vcc, 1, v0
3637 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc
3638 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
3639 ; GFX8-GISEL-NEXT: v_add_u32_e32 v7, vcc, 1, v5
3640 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v4
3641 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
3642 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
3643 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v2, vcc
3644 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
3645 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
3646 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
3647 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
3649 ; GFX900-SDAG-LABEL: clpeak_imad_pat_i64:
3650 ; GFX900-SDAG: ; %bb.0: ; %entry
3651 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3652 ; GFX900-SDAG-NEXT: v_add_co_u32_e32 v4, vcc, 1, v0
3653 ; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
3654 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v6, v5, v2
3655 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3
3656 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
3657 ; GFX900-SDAG-NEXT: v_add3_u32 v6, v1, v7, v6
3658 ; GFX900-SDAG-NEXT: v_add_co_u32_e32 v1, vcc, v0, v4
3659 ; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v4, vcc, v6, v5, vcc
3660 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v4, v2
3661 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v1, v3
3662 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v1, v2, 0
3663 ; GFX900-SDAG-NEXT: v_add3_u32 v2, v2, v3, v4
3664 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v2, v0
3665 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v1, v0, v[1:2]
3666 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v1, v6
3667 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v3, v2
3668 ; GFX900-SDAG-NEXT: v_add3_u32 v4, v5, v4, v0
3669 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v4, v1
3670 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v1, v[3:4]
3671 ; GFX900-SDAG-NEXT: v_add3_u32 v1, v5, v1, v2
3672 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
3674 ; GFX900-GISEL-LABEL: clpeak_imad_pat_i64:
3675 ; GFX900-GISEL: ; %bb.0: ; %entry
3676 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3677 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
3678 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
3679 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
3680 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, v[1:2]
3681 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v8, vcc, v0, v6
3682 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
3683 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v2, 0
3684 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v9, vcc, v4, v7, vcc
3685 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v6
3686 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v3, v[1:2]
3687 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v2, v[6:7]
3688 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
3689 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v4, vcc
3690 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v5, v6, 0
3691 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v7, vcc, 1, v5
3692 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v4
3693 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v1, v[0:1]
3694 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v7, 0
3695 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v2, vcc
3696 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, v[4:5]
3697 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v3, v8, v[1:2]
3698 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v7, v[1:2]
3699 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
3701 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_i64:
3702 ; GFX90A-SDAG: ; %bb.0: ; %entry
3703 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3704 ; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
3705 ; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
3706 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v1, v2
3707 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v7, v0, v3
3708 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v2, 0
3709 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
3710 ; GFX90A-SDAG-NEXT: v_add3_u32 v1, v6, v1, v7
3711 ; GFX90A-SDAG-NEXT: v_add3_u32 v5, v5, v7, v6
3712 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v3, v0, v3
3713 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v1, v2
3714 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, 0
3715 ; GFX90A-SDAG-NEXT: v_add3_u32 v1, v1, v3, v6
3716 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v1, v4
3717 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v4, v[0:1]
3718 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v0, v5
3719 ; GFX90A-SDAG-NEXT: v_add3_u32 v3, v6, v3, v4
3720 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v2, v1
3721 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v3, v0
3722 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
3723 ; GFX90A-SDAG-NEXT: v_add3_u32 v1, v5, v1, v4
3724 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
3726 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_i64:
3727 ; GFX90A-GISEL: ; %bb.0: ; %entry
3728 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3729 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
3730 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v1, vcc
3731 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v3, 0
3732 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v2, 0
3733 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v2, v[4:5]
3734 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v4
3735 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, v0, v6
3736 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, v1, v7, vcc
3737 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v2, 0
3738 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v6, v3, 0
3739 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, v[6:7]
3740 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
3741 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v1, vcc
3742 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, v5, v2
3743 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v7, vcc, 1, v4
3744 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v0, 0
3745 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v5, vcc
3746 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v6, 0
3747 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v6, v[0:1]
3748 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, v3, v0
3749 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v7, 0
3750 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v8, 0
3751 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[2:3]
3752 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v2
3753 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
3755 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i64:
3756 ; GFX10-SDAG: ; %bb.0: ; %entry
3757 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3758 ; GFX10-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1
3759 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
3760 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v4, v3
3761 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0
3762 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v5, v2
3763 ; GFX10-SDAG-NEXT: v_add_co_u32 v4, vcc_lo, v0, v4
3764 ; GFX10-SDAG-NEXT: v_add3_u32 v1, v1, v7, v6
3765 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3
3766 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v4, v2, 0
3767 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, v1, v5, vcc_lo
3768 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v5, v2
3769 ; GFX10-SDAG-NEXT: v_add3_u32 v4, v4, v6, v2
3770 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v3, v1
3771 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v4, v0
3772 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
3773 ; GFX10-SDAG-NEXT: v_add3_u32 v1, v5, v1, v2
3774 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v4
3775 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3
3776 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
3777 ; GFX10-SDAG-NEXT: v_add3_u32 v1, v4, v1, v2
3778 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
3780 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i64:
3781 ; GFX10-GISEL: ; %bb.0: ; %entry
3782 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3783 ; GFX10-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v0, 1
3784 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v1, vcc_lo
3785 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v4, v2, 0
3786 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v4, v3
3787 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v7, v5, v2
3788 ; GFX10-GISEL-NEXT: v_add3_u32 v1, v1, v6, v7
3789 ; GFX10-GISEL-NEXT: v_add_co_u32 v6, vcc_lo, v0, v4
3790 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v1, v5, vcc_lo
3791 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v6, v2, 0
3792 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v6, v3
3793 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v7, v2
3794 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
3795 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
3796 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
3797 ; GFX10-GISEL-NEXT: v_add3_u32 v5, v5, v3, v2
3798 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v4, v0, 0
3799 ; GFX10-GISEL-NEXT: v_add_co_u32 v4, vcc_lo, v4, 1
3800 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v5, v0
3801 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, 0, v5, vcc_lo
3802 ; GFX10-GISEL-NEXT: v_add3_u32 v3, v3, v1, v0
3803 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v2, v4, 0
3804 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v2, v5
3805 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v3, v4
3806 ; GFX10-GISEL-NEXT: v_add3_u32 v1, v1, v2, v3
3807 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
3809 %y18 = add i64 %x, 1
3810 %add = mul i64 %y18, %y
3811 %mul119 = add i64 %add, %y18
3812 %add2 = mul i64 %mul119, %y
3813 %add220 = add i64 %add, 1
3814 %add422 = add i64 %add2, 1
3815 %mul521 = mul i64 %add2, %add220
3816 %add6 = mul i64 %mul521, %add422
3820 define <2 x i64> @clpeak_imad_pat_v2i64(<2 x i64> %x, <2 x i64> %y) {
3821 ; GFX6-SDAG-LABEL: clpeak_imad_pat_v2i64:
3822 ; GFX6-SDAG: ; %bb.0: ; %entry
3823 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3824 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3825 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3826 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v8, v0, v5
3827 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v9, v0, v4
3828 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v10, v1, v4
3829 ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v2
3830 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3831 ; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, v9, v8
3832 ; GFX6-SDAG-NEXT: v_add_i32_e32 v8, vcc, v8, v10
3833 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v9, v2, v7
3834 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v10, v2, v6
3835 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v12, v3, v6
3836 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v11, v0, v4
3837 ; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, v10, v9
3838 ; GFX6-SDAG-NEXT: v_add_i32_e32 v9, vcc, v9, v12
3839 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v10, v2, v6
3840 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v11, v0
3841 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
3842 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v5
3843 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v12, v0, v4
3844 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v1, v4
3845 ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v10, v2
3846 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v3, vcc, v9, v3, vcc
3847 ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v12, v5
3848 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v5, v1
3849 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v2, v7
3850 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v7, v2, v6
3851 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v2, v6
3852 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v3, v6
3853 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v4
3854 ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v7, v5
3855 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v2, v9
3856 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v6, v2, v10
3857 ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v4, v3
3858 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v4, v3, v10
3859 ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5
3860 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v6, v0, v11
3861 ; GFX6-SDAG-NEXT: v_add_i32_e32 v4, vcc, v5, v4
3862 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v8
3863 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v8, v1, v11
3864 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v7, v2, v10
3865 ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5
3866 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v6, v0, v11
3867 ; GFX6-SDAG-NEXT: v_add_i32_e32 v5, vcc, v5, v8
3868 ; GFX6-SDAG-NEXT: v_add_i32_e32 v6, vcc, v6, v0
3869 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v5, vcc, v5, v1, vcc
3870 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v1, v6, v1
3871 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v8, v6, v0
3872 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v9, v5, v0
3873 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v6, v0
3874 ; GFX6-SDAG-NEXT: v_add_i32_e32 v7, vcc, v7, v2
3875 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v4, vcc, v4, v3, vcc
3876 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v8, v1
3877 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v9
3878 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v3, v7, v3
3879 ; GFX6-SDAG-NEXT: v_mul_hi_u32 v8, v7, v2
3880 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v6
3881 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
3882 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v4, v2
3883 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v7, v2
3884 ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v8, v3
3885 ; GFX6-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v5
3886 ; GFX6-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v7
3887 ; GFX6-SDAG-NEXT: v_addc_u32_e32 v3, vcc, v3, v4, vcc
3888 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
3890 ; GFX6-GISEL-LABEL: clpeak_imad_pat_v2i64:
3891 ; GFX6-GISEL: ; %bb.0: ; %entry
3892 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3893 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
3894 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
3895 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v8, v1, v4
3896 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v9, v0, v5
3897 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v11, v0, v4
3898 ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v2
3899 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
3900 ; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v9
3901 ; GFX6-GISEL-NEXT: v_add_i32_e32 v8, vcc, v8, v11
3902 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v9, v3, v6
3903 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v11, v2, v7
3904 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v13, v2, v6
3905 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v10, v0, v4
3906 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v12, v2, v6
3907 ; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v11
3908 ; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, v9, v13
3909 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v10, v0
3910 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v1, vcc, v8, v1, vcc
3911 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v1, v4
3912 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v5
3913 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v11, v0, v4
3914 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v0, v0, v4
3915 ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, v12, v2
3916 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v3, vcc, v9, v3, vcc
3917 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
3918 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v0
3919 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v3, v6
3920 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v2, v7
3921 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v2, v6
3922 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v2, v2, v6
3923 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
3924 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
3925 ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v10
3926 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v3, vcc, 0, v8, vcc
3927 ; GFX6-GISEL-NEXT: v_add_i32_e32 v5, vcc, 1, v12
3928 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc
3929 ; GFX6-GISEL-NEXT: v_add_i32_e32 v7, vcc, 1, v11
3930 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v8, vcc, 0, v0, vcc
3931 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
3932 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v11, v3
3933 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v12, v11, v2
3934 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v2, v11, v2
3935 ; GFX6-GISEL-NEXT: v_add_i32_e32 v9, vcc, 1, v4
3936 ; GFX6-GISEL-NEXT: v_addc_u32_e32 v10, vcc, 0, v1, vcc
3937 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v3
3938 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v2
3939 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
3940 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v4, v6
3941 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v3, v4, v5
3942 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v4, v4, v5
3943 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v5, v12, v7
3944 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v2
3945 ; GFX6-GISEL-NEXT: v_add_i32_e32 v2, vcc, v1, v4
3946 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v1, v0, v7
3947 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v12, v8
3948 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v12, v7
3949 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v4
3950 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
3951 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v4, v2, v9
3952 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v3, v10
3953 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v3, v9
3954 ; GFX6-GISEL-NEXT: v_mul_hi_u32 v3, v3, v9
3955 ; GFX6-GISEL-NEXT: v_add_i32_e32 v4, vcc, v4, v5
3956 ; GFX6-GISEL-NEXT: v_add_i32_e32 v3, vcc, v4, v3
3957 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
3959 ; GFX7-SDAG-LABEL: clpeak_imad_pat_v2i64:
3960 ; GFX7-SDAG: ; %bb.0: ; %entry
3961 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
3962 ; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, 1, v0
3963 ; GFX7-SDAG-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
3964 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5
3965 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0
3966 ; GFX7-SDAG-NEXT: v_add_i32_e32 v11, vcc, 1, v2
3967 ; GFX7-SDAG-NEXT: v_addc_u32_e32 v12, vcc, 0, v3, vcc
3968 ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v1, v10
3969 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v10, v9, v4
3970 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v13, v11, v7
3971 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v6, 0
3972 ; GFX7-SDAG-NEXT: v_add_i32_e32 v14, vcc, v3, v10
3973 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6
3974 ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v13
3975 ; GFX7-SDAG-NEXT: v_add_i32_e32 v13, vcc, v2, v3
3976 ; GFX7-SDAG-NEXT: v_add_i32_e32 v2, vcc, v0, v8
3977 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v5, v2, v5
3978 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v4, 0
3979 ; GFX7-SDAG-NEXT: v_addc_u32_e32 v9, vcc, v14, v9, vcc
3980 ; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, v1, v11
3981 ; GFX7-SDAG-NEXT: v_addc_u32_e32 v10, vcc, v13, v12, vcc
3982 ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v5
3983 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v5, v8, v7
3984 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v8, v6, 0
3985 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v9, v9, v4
3986 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v4, v10, v6
3987 ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, v8, v5
3988 ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v9
3989 ; GFX7-SDAG-NEXT: v_add_i32_e32 v8, vcc, v5, v4
3990 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v8, v1
3991 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v1, v[7:8]
3992 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v1, v3, v0
3993 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v2, v0, v[2:3]
3994 ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5
3995 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v7, v13
3996 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v2, v14
3997 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v10
3998 ; GFX7-SDAG-NEXT: v_add_i32_e32 v5, vcc, v6, v5
3999 ; GFX7-SDAG-NEXT: v_add_i32_e32 v10, vcc, v0, v1
4000 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v11, v10, v2
4001 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v2, v[9:10]
4002 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v6, v9, v3
4003 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v9, v5, v7
4004 ; GFX7-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[4:5]
4005 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v4, v4, v8
4006 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v11, v1
4007 ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v9, v3
4008 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, v6, v1
4009 ; GFX7-SDAG-NEXT: v_add_i32_e32 v3, vcc, v4, v3
4010 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
4012 ; GFX7-GISEL-LABEL: clpeak_imad_pat_v2i64:
4013 ; GFX7-GISEL: ; %bb.0: ; %entry
4014 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4015 ; GFX7-GISEL-NEXT: v_add_i32_e32 v12, vcc, 1, v0
4016 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
4017 ; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v2
4018 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
4019 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
4020 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
4021 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
4022 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v3
4023 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
4024 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
4025 ; GFX7-GISEL-NEXT: v_add_i32_e32 v3, vcc, v0, v12
4026 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
4027 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v8, v13, vcc
4028 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
4029 ; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, v2, v14
4030 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v6, 0
4031 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v11
4032 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v19, vcc, v9, v15, vcc
4033 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v3, v5, v[1:2]
4034 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v13
4035 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v16, v7, v[1:2]
4036 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v18, v4, v[14:15]
4037 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v6, v[16:17]
4038 ; GFX7-GISEL-NEXT: v_add_i32_e32 v11, vcc, 1, v0
4039 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
4040 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v8, vcc
4041 ; GFX7-GISEL-NEXT: v_add_i32_e32 v13, vcc, 1, v2
4042 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v6
4043 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
4044 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v13, 0
4045 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v9, vcc
4046 ; GFX7-GISEL-NEXT: v_add_i32_e32 v14, vcc, 1, v10
4047 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
4048 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, v7
4049 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
4050 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, v[0:1]
4051 ; GFX7-GISEL-NEXT: v_add_i32_e32 v16, vcc, 1, v12
4052 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
4053 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v13, v[2:3]
4054 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
4055 ; GFX7-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc
4056 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
4057 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v3
4058 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
4059 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
4060 ; GFX7-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
4061 ; GFX7-GISEL-NEXT: v_mov_b32_e32 v1, v5
4062 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
4064 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i64:
4065 ; GFX8-SDAG: ; %bb.0: ; %entry
4066 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4067 ; GFX8-SDAG-NEXT: v_add_u32_e32 v8, vcc, 1, v0
4068 ; GFX8-SDAG-NEXT: v_addc_u32_e32 v9, vcc, 0, v1, vcc
4069 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5
4070 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0
4071 ; GFX8-SDAG-NEXT: v_add_u32_e32 v11, vcc, 1, v2
4072 ; GFX8-SDAG-NEXT: v_addc_u32_e32 v12, vcc, 0, v3, vcc
4073 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v1, v10
4074 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v10, v9, v4
4075 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v13, v11, v7
4076 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v11, v6, 0
4077 ; GFX8-SDAG-NEXT: v_add_u32_e32 v14, vcc, v3, v10
4078 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v12, v6
4079 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v13
4080 ; GFX8-SDAG-NEXT: v_add_u32_e32 v13, vcc, v2, v3
4081 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v0, v8
4082 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v2, v5
4083 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v4, 0
4084 ; GFX8-SDAG-NEXT: v_addc_u32_e32 v9, vcc, v14, v9, vcc
4085 ; GFX8-SDAG-NEXT: v_add_u32_e32 v8, vcc, v1, v11
4086 ; GFX8-SDAG-NEXT: v_addc_u32_e32 v10, vcc, v13, v12, vcc
4087 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v5
4088 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v8, v7
4089 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v8, v6, 0
4090 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v9, v9, v4
4091 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v10, v6
4092 ; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v8, v5
4093 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v9
4094 ; GFX8-SDAG-NEXT: v_add_u32_e32 v8, vcc, v5, v4
4095 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v8, v1
4096 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v1, v[7:8]
4097 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v3, v0
4098 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v2, v0, v[2:3]
4099 ; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v6, v5
4100 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v7, v13
4101 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v14
4102 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v10
4103 ; GFX8-SDAG-NEXT: v_add_u32_e32 v5, vcc, v6, v5
4104 ; GFX8-SDAG-NEXT: v_add_u32_e32 v10, vcc, v0, v1
4105 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v11, v10, v2
4106 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v2, v[9:10]
4107 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v6, v9, v3
4108 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v9, v5, v7
4109 ; GFX8-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v7, v[4:5]
4110 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v4, v8
4111 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v11, v1
4112 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v9, v3
4113 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v6, v1
4114 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v4, v3
4115 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
4117 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i64:
4118 ; GFX8-GISEL: ; %bb.0: ; %entry
4119 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4120 ; GFX8-GISEL-NEXT: v_add_u32_e32 v12, vcc, 1, v0
4121 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v13, vcc, 0, v1, vcc
4122 ; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v2
4123 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
4124 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
4125 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
4126 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
4127 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v3
4128 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
4129 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
4130 ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, v0, v12
4131 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
4132 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v18, vcc, v8, v13, vcc
4133 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
4134 ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, v2, v14
4135 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v6, 0
4136 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v11
4137 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v19, vcc, v9, v15, vcc
4138 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v3, v5, v[1:2]
4139 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v13
4140 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v16, v7, v[1:2]
4141 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v18, v4, v[14:15]
4142 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v6, v[16:17]
4143 ; GFX8-GISEL-NEXT: v_add_u32_e32 v11, vcc, 1, v0
4144 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
4145 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v1, vcc, 0, v8, vcc
4146 ; GFX8-GISEL-NEXT: v_add_u32_e32 v13, vcc, 1, v2
4147 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v6
4148 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
4149 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v13, 0
4150 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v2, vcc, 0, v9, vcc
4151 ; GFX8-GISEL-NEXT: v_add_u32_e32 v14, vcc, 1, v10
4152 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
4153 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v7
4154 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v15, vcc, 0, v3, vcc
4155 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, v[0:1]
4156 ; GFX8-GISEL-NEXT: v_add_u32_e32 v16, vcc, 1, v12
4157 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
4158 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v13, v[2:3]
4159 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
4160 ; GFX8-GISEL-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc
4161 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
4162 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v3
4163 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
4164 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
4165 ; GFX8-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
4166 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v5
4167 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
4169 ; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i64:
4170 ; GFX900-SDAG: ; %bb.0: ; %entry
4171 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4172 ; GFX900-SDAG-NEXT: v_add_co_u32_e32 v8, vcc, 1, v2
4173 ; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v3, vcc
4174 ; GFX900-SDAG-NEXT: v_add_co_u32_e32 v10, vcc, 1, v0
4175 ; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v1, vcc
4176 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v12, v11, v4
4177 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v13, v10, v5
4178 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0
4179 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v14, v9, v6
4180 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v15, v8, v7
4181 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0
4182 ; GFX900-SDAG-NEXT: v_add3_u32 v12, v1, v13, v12
4183 ; GFX900-SDAG-NEXT: v_add3_u32 v1, v3, v15, v14
4184 ; GFX900-SDAG-NEXT: v_add_co_u32_e32 v8, vcc, v2, v8
4185 ; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v9, vcc, v1, v9, vcc
4186 ; GFX900-SDAG-NEXT: v_add_co_u32_e32 v3, vcc, v0, v10
4187 ; GFX900-SDAG-NEXT: v_addc_co_u32_e32 v10, vcc, v12, v11, vcc
4188 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v10, v4
4189 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v11, v3, v5
4190 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v3, v4, 0
4191 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v9, v6
4192 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v7, v8, v7
4193 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v6, 0
4194 ; GFX900-SDAG-NEXT: v_add3_u32 v4, v4, v11, v10
4195 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v4, v0
4196 ; GFX900-SDAG-NEXT: v_add3_u32 v6, v6, v7, v9
4197 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v6, v2
4198 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v5, v2, v[5:6]
4199 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v5, v1
4200 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4]
4201 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v11, v3, v12
4202 ; GFX900-SDAG-NEXT: v_add3_u32 v8, v9, v8, v2
4203 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v0, v4
4204 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v6, v7, v6
4205 ; GFX900-SDAG-NEXT: v_add3_u32 v1, v10, v1, v11
4206 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v9, v1, v3
4207 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v3, v[0:1]
4208 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5
4209 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v5, v[7:8]
4210 ; GFX900-SDAG-NEXT: v_add3_u32 v1, v9, v1, v4
4211 ; GFX900-SDAG-NEXT: v_add3_u32 v3, v10, v3, v6
4212 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4214 ; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i64:
4215 ; GFX900-GISEL: ; %bb.0: ; %entry
4216 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4217 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v0
4218 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v1, vcc
4219 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v2
4220 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v12, v4, 0
4221 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v3, vcc
4222 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v14, v6, 0
4223 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v5, v[1:2]
4224 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v3
4225 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v14, v7, v[1:2]
4226 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v4, v[8:9]
4227 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v3, vcc, v0, v12
4228 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v15, v6, v[10:11]
4229 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v18, vcc, v8, v13, vcc
4230 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v3, v4, 0
4231 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, v2, v14
4232 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[12:13], s[4:5], v16, v6, 0
4233 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v11
4234 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v19, vcc, v9, v15, vcc
4235 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[14:15], s[4:5], v3, v5, v[1:2]
4236 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v13
4237 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[16:17], s[4:5], v16, v7, v[1:2]
4238 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v18, v4, v[14:15]
4239 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v19, v6, v[16:17]
4240 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v11, vcc, 1, v0
4241 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v11, 0
4242 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v8, vcc
4243 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v13, vcc, 1, v2
4244 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v6
4245 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v1, v[0:1]
4246 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v13, 0
4247 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v2, vcc, 0, v9, vcc
4248 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v10
4249 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v3, v11, v[0:1]
4250 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v0, v7
4251 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v3, vcc
4252 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, v[0:1]
4253 ; GFX900-GISEL-NEXT: v_add_co_u32_e32 v16, vcc, 1, v12
4254 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v14, 0
4255 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v4, v13, v[2:3]
4256 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v16, 0
4257 ; GFX900-GISEL-NEXT: v_addc_co_u32_e32 v17, vcc, 0, v4, vcc
4258 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v5, v15, v[1:2]
4259 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v3
4260 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v6, v17, v[1:2]
4261 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v14, v[4:5]
4262 ; GFX900-GISEL-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v9, v16, v[10:11]
4263 ; GFX900-GISEL-NEXT: v_mov_b32_e32 v1, v5
4264 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4266 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i64:
4267 ; GFX90A-SDAG: ; %bb.0: ; %entry
4268 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4269 ; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2
4270 ; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
4271 ; GFX90A-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 1, v0
4272 ; GFX90A-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
4273 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v12, v1, v4
4274 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v13, v0, v5
4275 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v0, v4, 0
4276 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, v[0:1]
4277 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v14, v3, v6
4278 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v15, v2, v7
4279 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v2, v6, 0
4280 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v6, v[2:3]
4281 ; GFX90A-SDAG-NEXT: v_add3_u32 v1, v12, v1, v13
4282 ; GFX90A-SDAG-NEXT: v_add3_u32 v9, v9, v13, v12
4283 ; GFX90A-SDAG-NEXT: v_add3_u32 v3, v14, v3, v15
4284 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v0, v5
4285 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v12, v1, v4
4286 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v4, 0
4287 ; GFX90A-SDAG-NEXT: v_add3_u32 v1, v1, v5, v12
4288 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v2, v7
4289 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v3, v6
4290 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v6, 0
4291 ; GFX90A-SDAG-NEXT: v_add3_u32 v11, v11, v15, v14
4292 ; GFX90A-SDAG-NEXT: v_add3_u32 v3, v3, v4, v5
4293 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v3, v10
4294 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v10, v[2:3]
4295 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v7, v2, v11
4296 ; GFX90A-SDAG-NEXT: v_add3_u32 v5, v6, v5, v7
4297 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v10, v1, v8
4298 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v8, v[0:1]
4299 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v0, v9
4300 ; GFX90A-SDAG-NEXT: v_add3_u32 v7, v10, v7, v8
4301 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v8, v6, v1
4302 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v9, v7, v0
4303 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
4304 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v4, v3
4305 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v7, v5, v2
4306 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
4307 ; GFX90A-SDAG-NEXT: v_add3_u32 v1, v9, v1, v8
4308 ; GFX90A-SDAG-NEXT: v_add3_u32 v3, v7, v3, v6
4309 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
4311 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i64:
4312 ; GFX90A-GISEL: ; %bb.0: ; %entry
4313 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4314 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 1, v0
4315 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v1, vcc
4316 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v2
4317 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v3, vcc
4318 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v5, 0
4319 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0
4320 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v4, v[2:3]
4321 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v2
4322 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v12, v7, 0
4323 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, v0, v10
4324 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v6, 0
4325 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v13, v6, v[8:9]
4326 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v14, vcc, v1, v11, vcc
4327 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v3, v8
4328 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, v2, v12
4329 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[8:9], s[4:5], v10, v4, 0
4330 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v10, v5, 0
4331 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, v3, v13, vcc
4332 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v14, v4, v[10:11]
4333 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[10:11], s[4:5], v12, v7, 0
4334 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v9, v9, v4
4335 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v6, 0
4336 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v13, v6, v[10:11]
4337 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, v5, v6
4338 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v6, vcc, 1, v0
4339 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v1, vcc
4340 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v10, vcc, 1, v2
4341 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v3, vcc
4342 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0
4343 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v12, vcc, 1, v8
4344 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v6, 0
4345 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v6, v[0:1]
4346 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v9, vcc
4347 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v8, v3, v0
4348 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v11, 0
4349 ; GFX90A-GISEL-NEXT: v_add_co_u32_e32 v14, vcc, 1, v4
4350 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v4, v10, 0
4351 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v10, v[0:1]
4352 ; GFX90A-GISEL-NEXT: v_addc_co_u32_e32 v15, vcc, 0, v5, vcc
4353 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v7, v7, v0
4354 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v12, 0
4355 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v2, v13, 0
4356 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v12, v[2:3]
4357 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v15, 0
4358 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v1, v2
4359 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, v14, 0
4360 ; GFX90A-GISEL-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v7, v14, v[4:5]
4361 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v3, v4
4362 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
4364 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i64:
4365 ; GFX10-SDAG: ; %bb.0: ; %entry
4366 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4367 ; GFX10-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1
4368 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
4369 ; GFX10-SDAG-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1
4370 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo
4371 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v12, v9, v4
4372 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v13, v8, v5
4373 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0
4374 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v14, v11, v6
4375 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v15, v10, v7
4376 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v10, v6, 0
4377 ; GFX10-SDAG-NEXT: v_add3_u32 v12, v1, v13, v12
4378 ; GFX10-SDAG-NEXT: v_add_co_u32 v1, vcc_lo, v0, v8
4379 ; GFX10-SDAG-NEXT: v_add3_u32 v13, v3, v15, v14
4380 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v12, v9, vcc_lo
4381 ; GFX10-SDAG-NEXT: v_add_co_u32 v8, vcc_lo, v2, v10
4382 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, v13, v11, vcc_lo
4383 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v3, v4
4384 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v11, v1, v5
4385 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v7, v8, v7
4386 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v9, v6
4387 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[5:6], null, v8, v6, 0
4388 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v1, v4, 0
4389 ; GFX10-SDAG-NEXT: v_add3_u32 v6, v6, v7, v9
4390 ; GFX10-SDAG-NEXT: v_add3_u32 v4, v4, v11, v10
4391 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v11, v3, v12
4392 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v6, v2
4393 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[7:8], null, v5, v2, v[5:6]
4394 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v4, v0
4395 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
4396 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v5, v13
4397 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v6, v7, v6
4398 ; GFX10-SDAG-NEXT: v_add3_u32 v1, v10, v1, v11
4399 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v4
4400 ; GFX10-SDAG-NEXT: v_add3_u32 v8, v9, v8, v2
4401 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v9, v1, v3
4402 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v0, v3, v[0:1]
4403 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v10, v8, v5
4404 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[2:3], null, v7, v5, v[7:8]
4405 ; GFX10-SDAG-NEXT: v_add3_u32 v1, v9, v1, v4
4406 ; GFX10-SDAG-NEXT: v_add3_u32 v3, v10, v3, v6
4407 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
4409 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i64:
4410 ; GFX10-GISEL: ; %bb.0: ; %entry
4411 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4412 ; GFX10-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v0, 1
4413 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v1, vcc_lo
4414 ; GFX10-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v2, 1
4415 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v3, vcc_lo
4416 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v8, v4, 0
4417 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v12, v8, v5
4418 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v13, v9, v4
4419 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v10, v6, 0
4420 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v14, v10, v7
4421 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v15, v11, v6
4422 ; GFX10-GISEL-NEXT: v_add3_u32 v1, v1, v12, v13
4423 ; GFX10-GISEL-NEXT: v_add3_u32 v12, v3, v14, v15
4424 ; GFX10-GISEL-NEXT: v_add_co_u32 v3, vcc_lo, v0, v8
4425 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v13, vcc_lo, v1, v9, vcc_lo
4426 ; GFX10-GISEL-NEXT: v_add_co_u32 v10, vcc_lo, v2, v10
4427 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, v12, v11, vcc_lo
4428 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[8:9], null, v3, v4, 0
4429 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v3, v5
4430 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v13, v13, v4
4431 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[3:4], null, v10, v6, 0
4432 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v7, v10, v7
4433 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v11, v6
4434 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
4435 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
4436 ; GFX10-GISEL-NEXT: v_add_co_u32 v2, vcc_lo, v2, 1
4437 ; GFX10-GISEL-NEXT: v_add3_u32 v9, v9, v5, v13
4438 ; GFX10-GISEL-NEXT: v_add3_u32 v10, v4, v7, v6
4439 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v11, vcc_lo, 0, v12, vcc_lo
4440 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[4:5], null, v8, v0, 0
4441 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v8, v1
4442 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v9, v0
4443 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[6:7], null, v3, v2, 0
4444 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v11, v3, v11
4445 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v10, v2
4446 ; GFX10-GISEL-NEXT: v_add_co_u32 v8, vcc_lo, v8, 1
4447 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v9, vcc_lo
4448 ; GFX10-GISEL-NEXT: v_add_co_u32 v12, vcc_lo, v3, 1
4449 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v10, vcc_lo, 0, v10, vcc_lo
4450 ; GFX10-GISEL-NEXT: v_add3_u32 v3, v5, v1, v0
4451 ; GFX10-GISEL-NEXT: v_add3_u32 v5, v7, v11, v2
4452 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[0:1], null, v4, v8, 0
4453 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v4, v9
4454 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v7, v3, v8
4455 ; GFX10-GISEL-NEXT: v_mad_u64_u32 v[2:3], null, v6, v12, 0
4456 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v6, v6, v10
4457 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v5, v12
4458 ; GFX10-GISEL-NEXT: v_add3_u32 v1, v1, v4, v7
4459 ; GFX10-GISEL-NEXT: v_add3_u32 v3, v3, v6, v5
4460 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
4462 %y18 = add <2 x i64> %x, <i64 1, i64 1>
4463 %add = mul <2 x i64> %y18, %y
4464 %mul119 = add <2 x i64> %add, %y18
4465 %add2 = mul <2 x i64> %mul119, %y
4466 %add220 = add <2 x i64> %add, <i64 1, i64 1>
4467 %add422 = add <2 x i64> %add2, <i64 1, i64 1>
4468 %mul521 = mul <2 x i64> %add2, %add220
4469 %add6 = mul <2 x i64> %mul521, %add422
4473 define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %ptr) {
4474 ; GFX6-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
4475 ; GFX6-SDAG: ; %bb.0: ; %bb
4476 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4477 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4478 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4479 ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
4480 ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
4481 ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
4482 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4483 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4484 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4485 ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
4486 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1
4487 ; GFX6-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4488 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
4489 ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4490 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
4491 ; GFX6-SDAG-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
4492 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(1)
4493 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v1
4494 ; GFX6-SDAG-NEXT: s_waitcnt expcnt(0)
4495 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
4497 ; GFX6-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
4498 ; GFX6-GISEL: ; %bb.0: ; %bb
4499 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4500 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4501 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4502 ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
4503 ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
4504 ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
4505 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4506 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4507 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4508 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1
4509 ; GFX6-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4510 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
4511 ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4512 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
4513 ; GFX6-GISEL-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
4514 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(1)
4515 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v1
4516 ; GFX6-GISEL-NEXT: s_waitcnt expcnt(0)
4517 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
4519 ; GFX7-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
4520 ; GFX7-SDAG: ; %bb.0: ; %bb
4521 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4522 ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4523 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4524 ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
4525 ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
4526 ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
4527 ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4528 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4529 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4530 ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
4531 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v5, v0, v1
4532 ; GFX7-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4533 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
4534 ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4535 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
4536 ; GFX7-SDAG-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
4537 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
4538 ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v1
4539 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
4541 ; GFX7-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
4542 ; GFX7-GISEL: ; %bb.0: ; %bb
4543 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4544 ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4545 ; GFX7-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4546 ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
4547 ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
4548 ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
4549 ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4550 ; GFX7-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4551 ; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4552 ; GFX7-GISEL-NEXT: v_mul_lo_u32 v5, v0, v1
4553 ; GFX7-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4554 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
4555 ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4556 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
4557 ; GFX7-GISEL-NEXT: buffer_store_dword v5, v[3:4], s[4:7], 0 addr64
4558 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
4559 ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v5, v1
4560 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
4562 ; GFX8-LABEL: v_multi_use_mul_chain_add_other_use_all:
4563 ; GFX8: ; %bb.0: ; %bb
4564 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4565 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
4566 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
4567 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
4568 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
4569 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4570 ; GFX8-NEXT: v_mul_lo_u32 v5, v0, v1
4571 ; GFX8-NEXT: flat_store_dword v[3:4], v2
4572 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4573 ; GFX8-NEXT: flat_store_dword v[3:4], v0
4574 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4575 ; GFX8-NEXT: flat_store_dword v[3:4], v5
4576 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4577 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v5, v1
4578 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4580 ; GFX900-LABEL: v_multi_use_mul_chain_add_other_use_all:
4581 ; GFX900: ; %bb.0: ; %bb
4582 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4583 ; GFX900-NEXT: v_add_u32_e32 v0, 1, v0
4584 ; GFX900-NEXT: v_mul_lo_u32 v2, v0, v1
4585 ; GFX900-NEXT: v_add_u32_e32 v0, v2, v0
4586 ; GFX900-NEXT: v_mul_lo_u32 v0, v0, v1
4587 ; GFX900-NEXT: v_add_u32_e32 v1, 1, v2
4588 ; GFX900-NEXT: v_mul_lo_u32 v5, v0, v1
4589 ; GFX900-NEXT: global_store_dword v[3:4], v2, off
4590 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4591 ; GFX900-NEXT: global_store_dword v[3:4], v0, off
4592 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4593 ; GFX900-NEXT: global_store_dword v[3:4], v5, off
4594 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4595 ; GFX900-NEXT: v_add_u32_e32 v0, v5, v1
4596 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4598 ; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
4599 ; GFX90A-SDAG: ; %bb.0: ; %bb
4600 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4601 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
4602 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4603 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
4604 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v5, v4
4605 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v4, v3
4606 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4607 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2
4608 ; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v2, off
4609 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
4610 ; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v0, off
4611 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
4612 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4613 ; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v0, off
4614 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
4615 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v0, v1
4616 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
4618 ; GFX90A-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
4619 ; GFX90A-GISEL: ; %bb.0: ; %bb
4620 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4621 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
4622 ; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, v3
4623 ; GFX90A-GISEL-NEXT: v_mov_b32_e32 v3, v4
4624 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v1
4625 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0
4626 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4627 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v4
4628 ; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v4, off
4629 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
4630 ; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v0, off
4631 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
4632 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4633 ; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v0, off
4634 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
4635 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v0, v1
4636 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
4638 ; GFX10-LABEL: v_multi_use_mul_chain_add_other_use_all:
4639 ; GFX10: ; %bb.0: ; %bb
4640 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4641 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0
4642 ; GFX10-NEXT: v_mul_lo_u32 v2, v0, v1
4643 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
4644 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
4645 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v2
4646 ; GFX10-NEXT: v_mul_lo_u32 v5, v1, v0
4647 ; GFX10-NEXT: global_store_dword v[3:4], v2, off
4648 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4649 ; GFX10-NEXT: global_store_dword v[3:4], v1, off
4650 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4651 ; GFX10-NEXT: global_store_dword v[3:4], v5, off
4652 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4653 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v0
4654 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4656 %i = add i32 %arg, 1
4657 %i3 = mul i32 %i, %arg1
4658 store volatile i32 %i3, ptr addrspace(1) %ptr
4659 %i4 = add i32 %i3, %i
4660 %i5 = mul i32 %i4, %arg1
4661 store volatile i32 %i5, ptr addrspace(1) %ptr
4662 %i6 = add i32 %i3, 1
4663 %i7 = mul i32 %i5, %i6
4664 store volatile i32 %i7, ptr addrspace(1) %ptr
4665 %i8 = add i32 %i7, %i6
4669 define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %ptr) {
4670 ; GFX6-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
4671 ; GFX6-SDAG: ; %bb.0: ; %bb
4672 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4673 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4674 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4675 ; GFX6-SDAG-NEXT: s_mov_b32 s6, 0
4676 ; GFX6-SDAG-NEXT: s_mov_b32 s7, 0xf000
4677 ; GFX6-SDAG-NEXT: s_mov_b32 s4, s6
4678 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4679 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4680 ; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4681 ; GFX6-SDAG-NEXT: s_mov_b32 s5, s6
4682 ; GFX6-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4683 ; GFX6-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4684 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0)
4685 ; GFX6-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4686 ; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0)
4687 ; GFX6-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1
4688 ; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31]
4690 ; GFX6-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
4691 ; GFX6-GISEL: ; %bb.0: ; %bb
4692 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4693 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4694 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4695 ; GFX6-GISEL-NEXT: s_mov_b32 s6, 0
4696 ; GFX6-GISEL-NEXT: s_mov_b32 s7, 0xf000
4697 ; GFX6-GISEL-NEXT: s_mov_b64 s[4:5], 0
4698 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4699 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4700 ; GFX6-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4701 ; GFX6-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4702 ; GFX6-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4703 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0)
4704 ; GFX6-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4705 ; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0)
4706 ; GFX6-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
4707 ; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31]
4709 ; GFX7-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
4710 ; GFX7-SDAG: ; %bb.0: ; %bb
4711 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4712 ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4713 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4714 ; GFX7-SDAG-NEXT: s_mov_b32 s6, 0
4715 ; GFX7-SDAG-NEXT: s_mov_b32 s7, 0xf000
4716 ; GFX7-SDAG-NEXT: s_mov_b32 s4, s6
4717 ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4718 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4719 ; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4720 ; GFX7-SDAG-NEXT: s_mov_b32 s5, s6
4721 ; GFX7-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4722 ; GFX7-SDAG-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4723 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
4724 ; GFX7-SDAG-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4725 ; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0)
4726 ; GFX7-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1
4727 ; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31]
4729 ; GFX7-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
4730 ; GFX7-GISEL: ; %bb.0: ; %bb
4731 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4732 ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4733 ; GFX7-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4734 ; GFX7-GISEL-NEXT: s_mov_b32 s6, 0
4735 ; GFX7-GISEL-NEXT: s_mov_b32 s7, 0xf000
4736 ; GFX7-GISEL-NEXT: s_mov_b64 s[4:5], 0
4737 ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4738 ; GFX7-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4739 ; GFX7-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4740 ; GFX7-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4741 ; GFX7-GISEL-NEXT: buffer_store_dword v2, v[3:4], s[4:7], 0 addr64
4742 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
4743 ; GFX7-GISEL-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
4744 ; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0)
4745 ; GFX7-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v1
4746 ; GFX7-GISEL-NEXT: s_setpc_b64 s[30:31]
4748 ; GFX8-LABEL: v_multi_use_mul_chain_add_other_use_some:
4749 ; GFX8: ; %bb.0: ; %bb
4750 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4751 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 1, v0
4752 ; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1
4753 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v2, v0
4754 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
4755 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4756 ; GFX8-NEXT: v_mul_lo_u32 v0, v0, v1
4757 ; GFX8-NEXT: flat_store_dword v[3:4], v2
4758 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4759 ; GFX8-NEXT: flat_store_dword v[3:4], v0
4760 ; GFX8-NEXT: s_waitcnt vmcnt(0)
4761 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1
4762 ; GFX8-NEXT: s_setpc_b64 s[30:31]
4764 ; GFX900-LABEL: v_multi_use_mul_chain_add_other_use_some:
4765 ; GFX900: ; %bb.0: ; %bb
4766 ; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4767 ; GFX900-NEXT: v_add_u32_e32 v0, 1, v0
4768 ; GFX900-NEXT: v_mul_lo_u32 v2, v0, v1
4769 ; GFX900-NEXT: v_add_u32_e32 v0, v2, v0
4770 ; GFX900-NEXT: v_mul_lo_u32 v0, v0, v1
4771 ; GFX900-NEXT: v_add_u32_e32 v1, 1, v2
4772 ; GFX900-NEXT: v_mul_lo_u32 v0, v0, v1
4773 ; GFX900-NEXT: global_store_dword v[3:4], v2, off
4774 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4775 ; GFX900-NEXT: global_store_dword v[3:4], v0, off
4776 ; GFX900-NEXT: s_waitcnt vmcnt(0)
4777 ; GFX900-NEXT: v_add_u32_e32 v0, v0, v1
4778 ; GFX900-NEXT: s_setpc_b64 s[30:31]
4780 ; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
4781 ; GFX90A-SDAG: ; %bb.0: ; %bb
4782 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4783 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
4784 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4785 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
4786 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4787 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2
4788 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v5, v4
4789 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v4, v3
4790 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4791 ; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v2, off
4792 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
4793 ; GFX90A-SDAG-NEXT: global_store_dword v[4:5], v0, off
4794 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0)
4795 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v0, v1
4796 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
4798 ; GFX90A-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
4799 ; GFX90A-GISEL: ; %bb.0: ; %bb
4800 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4801 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
4802 ; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, v3
4803 ; GFX90A-GISEL-NEXT: v_mov_b32_e32 v3, v4
4804 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v1
4805 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0
4806 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4807 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v4
4808 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4809 ; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v4, off
4810 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
4811 ; GFX90A-GISEL-NEXT: global_store_dword v[2:3], v0, off
4812 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0)
4813 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v0, v1
4814 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
4816 ; GFX10-LABEL: v_multi_use_mul_chain_add_other_use_some:
4817 ; GFX10: ; %bb.0: ; %bb
4818 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4819 ; GFX10-NEXT: v_add_nc_u32_e32 v0, 1, v0
4820 ; GFX10-NEXT: v_mul_lo_u32 v2, v0, v1
4821 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v2, v0
4822 ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
4823 ; GFX10-NEXT: v_add_nc_u32_e32 v1, 1, v2
4824 ; GFX10-NEXT: v_mul_lo_u32 v5, v0, v1
4825 ; GFX10-NEXT: global_store_dword v[3:4], v2, off
4826 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4827 ; GFX10-NEXT: global_store_dword v[3:4], v5, off
4828 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
4829 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v5, v1
4830 ; GFX10-NEXT: s_setpc_b64 s[30:31]
4832 %i = add i32 %arg, 1
4833 %i3 = mul i32 %i, %arg1
4834 store volatile i32 %i3, ptr addrspace(1) %ptr
4835 %i4 = add i32 %i3, %i
4836 %i5 = mul i32 %i4, %arg1
4837 %i6 = add i32 %i3, 1
4838 %i7 = mul i32 %i5, %i6
4839 store volatile i32 %i7, ptr addrspace(1) %ptr
4840 %i8 = add i32 %i7, %i6
4844 define i32 @clpeak_imad_pat_i32_x2(i32 %x, i32 %y) {
4845 ; GFX67-SDAG-LABEL: clpeak_imad_pat_i32_x2:
4846 ; GFX67-SDAG: ; %bb.0: ; %entry
4847 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4848 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4849 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4850 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4851 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4852 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4853 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4854 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v1
4855 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4856 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4857 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4858 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v2, v1
4859 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4860 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
4861 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v0
4862 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4863 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v1
4864 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
4866 ; GFX67-GISEL-LABEL: clpeak_imad_pat_i32_x2:
4867 ; GFX67-GISEL: ; %bb.0: ; %entry
4868 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4869 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4870 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4871 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v0
4872 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4873 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4874 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4875 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1
4876 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4877 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4878 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4879 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v2, v1
4880 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4881 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v2
4882 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
4883 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
4884 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4885 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
4887 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i32_x2:
4888 ; GFX8-SDAG: ; %bb.0: ; %entry
4889 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4890 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
4891 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4892 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v2, v0
4893 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4894 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4895 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4896 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v1
4897 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4898 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4899 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4900 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v2, v1
4901 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4902 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v0, v2
4903 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v0
4904 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4905 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v1
4906 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
4908 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i32_x2:
4909 ; GFX8-GISEL: ; %bb.0: ; %entry
4910 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4911 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
4912 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4913 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v2, v0
4914 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4915 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4916 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4917 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v2, v1
4918 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4919 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4920 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4921 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v2, v1
4922 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4923 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v2
4924 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
4925 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
4926 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4927 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
4929 ; GFX900-SDAG-LABEL: clpeak_imad_pat_i32_x2:
4930 ; GFX900-SDAG: ; %bb.0: ; %entry
4931 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4932 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
4933 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4934 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
4935 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4936 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2
4937 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4938 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v2, v1
4939 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4940 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v2
4941 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4942 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v2, v1
4943 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4944 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v0, v2, v[0:1]
4945 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v1, v0, v[1:2]
4946 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
4948 ; GFX900-GISEL-LABEL: clpeak_imad_pat_i32_x2:
4949 ; GFX900-GISEL: ; %bb.0: ; %entry
4950 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4951 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
4952 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4953 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
4954 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4955 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
4956 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4957 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v2, v1
4958 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4959 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
4960 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4961 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v2, v1
4962 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4963 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
4964 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
4965 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
4966 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4967 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
4969 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_i32_x2:
4970 ; GFX90A-SDAG: ; %bb.0: ; %entry
4971 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4972 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
4973 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4974 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v2, v0
4975 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
4976 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2
4977 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4978 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v2, v1
4979 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4980 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v2
4981 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
4982 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v2, v1
4983 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
4984 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v0, v2, v[0:1]
4985 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v2, v0, v[2:3]
4986 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
4988 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_i32_x2:
4989 ; GFX90A-GISEL: ; %bb.0: ; %entry
4990 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4991 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
4992 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4993 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v2, v0
4994 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
4995 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
4996 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
4997 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v2, v1
4998 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
4999 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
5000 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
5001 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v2, v1
5002 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
5003 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v2
5004 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v0
5005 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
5006 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5007 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
5009 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i32_x2:
5010 ; GFX10-SDAG: ; %bb.0: ; %entry
5011 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5012 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
5013 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
5014 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v2, v0
5015 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v1
5016 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2
5017 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
5018 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1
5019 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
5020 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v2
5021 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v0, v1
5022 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v2, v1
5023 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v1, v0
5024 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v0, v2, v[0:1]
5025 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v1, v0, v[1:2]
5026 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5028 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i32_x2:
5029 ; GFX10-GISEL: ; %bb.0: ; %entry
5030 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5031 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
5032 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
5033 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v2, v0
5034 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v1
5035 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2
5036 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
5037 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1
5038 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
5039 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2
5040 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v1
5041 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v2, v1
5042 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
5043 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v2
5044 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v0, v1
5045 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
5046 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v1, v0
5047 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5049 %y38 = add i32 %x, 1
5050 %add = mul i32 %y38, %y
5051 %mul139 = add i32 %add, %y38
5052 %add2 = mul i32 %mul139, %y
5053 %add240 = add i32 %add, 1
5054 %add4 = mul i32 %add2, %add240
5055 %mul541 = add i32 %add4, %add240
5056 %add6 = mul i32 %mul541, %add2
5057 %add642 = add i32 %add4, 1
5058 %add8 = mul i32 %add6, %add642
5059 %mul943 = add i32 %add8, %add642
5060 %add10 = mul i32 %mul943, %add6
5061 %add1044 = add i32 %add8, 1
5062 %add1246 = add i32 %add10, 1
5063 %mul1345 = mul i32 %add10, %add1044
5064 %add14 = mul i32 %mul1345, %add1246
5068 define <2 x i32> @clpeak_imad_pat_v2i32_x2(<2 x i32> %x, <2 x i32> %y) {
5069 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
5070 ; GFX67-SDAG: ; %bb.0: ; %entry
5071 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5072 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5073 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5074 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2
5075 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3
5076 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v5, v0
5077 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v4, v1
5078 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
5079 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
5080 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v5
5081 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2
5082 ; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v4
5083 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4
5084 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v3, v2
5085 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5086 ; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, v5, v4
5087 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v4, v1
5088 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, 1, v3
5089 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2
5090 ; GFX67-SDAG-NEXT: v_add_i32_e32 v4, vcc, 1, v5
5091 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4
5092 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v3, v2
5093 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5094 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v5, v4
5095 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v2, v1
5096 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v2, v0, v3
5097 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5
5098 ; GFX67-SDAG-NEXT: v_add_i32_e32 v2, vcc, v2, v0
5099 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5100 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, v3, v1
5101 ; GFX67-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
5102 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, v0, v2
5103 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, v1, v3
5104 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
5106 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
5107 ; GFX67-GISEL: ; %bb.0: ; %entry
5108 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5109 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5110 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5111 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5112 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5113 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
5114 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
5115 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5116 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
5117 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
5118 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v5
5119 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2
5120 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4
5121 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
5122 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
5123 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5124 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
5125 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v3
5126 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v5
5127 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2
5128 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4
5129 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v3, v2
5130 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, v5, v4
5131 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5132 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
5133 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v3
5134 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
5135 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v0
5136 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5137 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3
5138 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5139 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
5140 ; GFX67-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1
5141 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
5143 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
5144 ; GFX8-SDAG: ; %bb.0: ; %entry
5145 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5146 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, 1, v0
5147 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, 1, v1
5148 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2
5149 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3
5150 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v5, v0
5151 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v4, v1
5152 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
5153 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
5154 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v5
5155 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2
5156 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, 1, v4
5157 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4
5158 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v3, v2
5159 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5160 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, v5, v4
5161 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v4, v1
5162 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, 1, v3
5163 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2
5164 ; GFX8-SDAG-NEXT: v_add_u32_e32 v4, vcc, 1, v5
5165 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4
5166 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v3, v2
5167 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5168 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v5, v4
5169 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v2, v1
5170 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v2, v0, v3
5171 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v3, v1, v5
5172 ; GFX8-SDAG-NEXT: v_add_u32_e32 v2, vcc, v2, v0
5173 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5174 ; GFX8-SDAG-NEXT: v_add_u32_e32 v3, vcc, v3, v1
5175 ; GFX8-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
5176 ; GFX8-SDAG-NEXT: v_add_u32_e32 v0, vcc, v0, v2
5177 ; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, v1, v3
5178 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
5180 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
5181 ; GFX8-GISEL: ; %bb.0: ; %entry
5182 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5183 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, 1, v0
5184 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1
5185 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5186 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5187 ; GFX8-GISEL-NEXT: v_add_u32_e32 v0, vcc, v4, v0
5188 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, v5, v1
5189 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5190 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
5191 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v4
5192 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v5
5193 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2
5194 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4
5195 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v3, v2
5196 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, v5, v4
5197 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5198 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
5199 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v3
5200 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v5
5201 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v3, v0, v2
5202 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v5, v1, v4
5203 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, v3, v2
5204 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, v5, v4
5205 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5206 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v4, v1
5207 ; GFX8-GISEL-NEXT: v_add_u32_e32 v2, vcc, 1, v3
5208 ; GFX8-GISEL-NEXT: v_add_u32_e32 v3, vcc, 1, v5
5209 ; GFX8-GISEL-NEXT: v_add_u32_e32 v4, vcc, 1, v0
5210 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5211 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v2, v1, v3
5212 ; GFX8-GISEL-NEXT: v_add_u32_e32 v1, vcc, 1, v1
5213 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
5214 ; GFX8-GISEL-NEXT: v_mul_lo_u32 v1, v2, v1
5215 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
5217 ; GFX900-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
5218 ; GFX900-SDAG: ; %bb.0: ; %entry
5219 ; GFX900-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5220 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
5221 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
5222 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
5223 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
5224 ; GFX900-SDAG-NEXT: v_add_u32_e32 v0, v4, v0
5225 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
5226 ; GFX900-SDAG-NEXT: v_add_u32_e32 v1, v5, v1
5227 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
5228 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v4
5229 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2
5230 ; GFX900-SDAG-NEXT: v_add_u32_e32 v4, 1, v5
5231 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4
5232 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v3, v2
5233 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5234 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v5, v4
5235 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v1, v2, v1
5236 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, 1, v3
5237 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v3, v0, v2
5238 ; GFX900-SDAG-NEXT: v_add_u32_e32 v4, 1, v5
5239 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v5, v1, v4
5240 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v3, v2
5241 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5242 ; GFX900-SDAG-NEXT: v_add_u32_e32 v2, v5, v4
5243 ; GFX900-SDAG-NEXT: v_mul_lo_u32 v2, v2, v1
5244 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v0, v3, v[0:1]
5245 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v5, v[2:3]
5246 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v0, v[3:4]
5247 ; GFX900-SDAG-NEXT: v_mad_u64_u32 v[1:2], s[4:5], v4, v2, v[4:5]
5248 ; GFX900-SDAG-NEXT: s_setpc_b64 s[30:31]
5250 ; GFX900-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
5251 ; GFX900-GISEL: ; %bb.0: ; %entry
5252 ; GFX900-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5253 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
5254 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
5255 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5256 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5257 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, v4, v0
5258 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, v5, v1
5259 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5260 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
5261 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
5262 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
5263 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5264 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5265 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, v4, v2
5266 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, v5, v3
5267 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5268 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5269 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
5270 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
5271 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5272 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5273 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, v4, v2
5274 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, v5, v3
5275 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5276 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5277 ; GFX900-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
5278 ; GFX900-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
5279 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2
5280 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3
5281 ; GFX900-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
5282 ; GFX900-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
5283 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5284 ; GFX900-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5285 ; GFX900-GISEL-NEXT: s_setpc_b64 s[30:31]
5287 ; GFX90A-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
5288 ; GFX90A-SDAG: ; %bb.0: ; %entry
5289 ; GFX90A-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5290 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, 1, v1
5291 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, 1, v0
5292 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
5293 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
5294 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v1, v5, v1
5295 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v0, v4, v0
5296 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
5297 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
5298 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v4
5299 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, 1, v5
5300 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v1, v3
5301 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v5, v0, v2
5302 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, v5, v2
5303 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, v4, v3
5304 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
5305 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5306 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, 1, v4
5307 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, 1, v5
5308 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v6, v0, v3
5309 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v4, v1, v2
5310 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v2, v4, v2
5311 ; GFX90A-SDAG-NEXT: v_add_u32_e32 v3, v6, v3
5312 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v0, v3, v0
5313 ; GFX90A-SDAG-NEXT: v_mul_lo_u32 v2, v2, v1
5314 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, v[2:3]
5315 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v0, v6, v[0:1]
5316 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v0, v[6:7]
5317 ; GFX90A-SDAG-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v2, v[4:5]
5318 ; GFX90A-SDAG-NEXT: v_mov_b32_e32 v1, v2
5319 ; GFX90A-SDAG-NEXT: s_setpc_b64 s[30:31]
5321 ; GFX90A-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
5322 ; GFX90A-GISEL: ; %bb.0: ; %entry
5323 ; GFX90A-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5324 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, 1, v0
5325 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, 1, v1
5326 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5327 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5328 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v0, v4, v0
5329 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v1, v5, v1
5330 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5331 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
5332 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
5333 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
5334 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5335 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5336 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, v4, v2
5337 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v5, v3
5338 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5339 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5340 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
5341 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
5342 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5343 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5344 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, v4, v2
5345 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, v5, v3
5346 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5347 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5348 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v2, 1, v4
5349 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v3, 1, v5
5350 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v4, 1, v0
5351 ; GFX90A-GISEL-NEXT: v_add_u32_e32 v5, 1, v1
5352 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5353 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
5354 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v0, v0, v4
5355 ; GFX90A-GISEL-NEXT: v_mul_lo_u32 v1, v1, v5
5356 ; GFX90A-GISEL-NEXT: s_setpc_b64 s[30:31]
5358 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i32_x2:
5359 ; GFX10-SDAG: ; %bb.0: ; %entry
5360 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5361 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, 1, v0
5362 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, 1, v1
5363 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
5364 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
5365 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v0, v4, v0
5366 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, v5, v1
5367 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v0, v2
5368 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4
5369 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v1, v1, v3
5370 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5
5371 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
5372 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
5373 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2
5374 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3
5375 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5376 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, 1, v4
5377 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v1, v3, v1
5378 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, 1, v5
5379 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v4, v0, v2
5380 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v5, v1, v3
5381 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v2, v4, v2
5382 ; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v3, v5, v3
5383 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v0, v2, v0
5384 ; GFX10-SDAG-NEXT: v_mul_lo_u32 v2, v3, v1
5385 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[3:4], null, v0, v4, v[0:1]
5386 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[4:5], null, v2, v5, v[2:3]
5387 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[0:1], null, v3, v0, v[3:4]
5388 ; GFX10-SDAG-NEXT: v_mad_u64_u32 v[1:2], null, v4, v2, v[4:5]
5389 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5391 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i32_x2:
5392 ; GFX10-GISEL: ; %bb.0: ; %entry
5393 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5394 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
5395 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
5396 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5397 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5398 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, v4, v0
5399 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, v5, v1
5400 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v0, v2
5401 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v1, v3
5402 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4
5403 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5
5404 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5405 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5406 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2
5407 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3
5408 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5409 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5410 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4
5411 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5
5412 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v4, v0, v2
5413 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v5, v1, v3
5414 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, v4, v2
5415 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, v5, v3
5416 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5417 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5418 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v2, 1, v4
5419 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v3, 1, v5
5420 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v2, v0, v2
5421 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v3, v1, v3
5422 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, 1, v0
5423 ; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v1, 1, v1
5424 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v0, v2, v0
5425 ; GFX10-GISEL-NEXT: v_mul_lo_u32 v1, v3, v1
5426 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5428 %y38 = add <2 x i32> %x, <i32 1, i32 1>
5429 %add = mul <2 x i32> %y38, %y
5430 %mul139 = add <2 x i32> %add, %y38
5431 %add2 = mul <2 x i32> %mul139, %y
5432 %add240 = add <2 x i32> %add, <i32 1, i32 1>
5433 %add4 = mul <2 x i32> %add2, %add240
5434 %mul541 = add <2 x i32> %add4, %add240
5435 %add6 = mul <2 x i32> %mul541, %add2
5436 %add642 = add <2 x i32> %add4, <i32 1, i32 1>
5437 %add8 = mul <2 x i32> %add6, %add642
5438 %mul943 = add <2 x i32> %add8, %add642
5439 %add10 = mul <2 x i32> %mul943, %add6
5440 %add1044 = add <2 x i32> %add8, <i32 1, i32 1>
5441 %add1246 = add <2 x i32> %add10, <i32 1, i32 1>
5442 %mul1345 = mul <2 x i32> %add10, %add1044
5443 %add14 = mul <2 x i32> %mul1345, %add1246
5444 ret <2 x i32> %add14
5447 define signext i16 @clpeak_imad_pat_i16_x2(i16 signext %x, i16 signext %y) {
5448 ; GFX67-SDAG-LABEL: clpeak_imad_pat_i16_x2:
5449 ; GFX67-SDAG: ; %bb.0: ; %entry
5450 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5451 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5452 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5453 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v1
5454 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v0, v2
5455 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5456 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5457 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
5458 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
5459 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
5460 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5461 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
5462 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2
5463 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
5464 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5465 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
5466 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
5467 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
5468 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5469 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
5470 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v2
5471 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
5472 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5473 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
5474 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v1, v3
5475 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
5476 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5477 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v4
5478 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5479 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
5480 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5481 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5482 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
5483 ; GFX67-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
5484 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
5486 ; GFX67-GISEL-LABEL: clpeak_imad_pat_i16_x2:
5487 ; GFX67-GISEL: ; %bb.0: ; %entry
5488 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5489 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5490 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5491 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5492 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5493 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5494 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5495 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
5496 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
5497 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5498 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5499 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5500 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5501 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5502 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5503 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
5504 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
5505 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5506 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5507 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5508 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5509 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5510 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5511 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
5512 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
5513 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5514 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5515 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5516 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5517 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5518 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5519 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5520 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
5521 ; GFX67-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
5522 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
5524 ; GFX8-SDAG-LABEL: clpeak_imad_pat_i16_x2:
5525 ; GFX8-SDAG: ; %bb.0: ; %entry
5526 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5527 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5528 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5529 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5530 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5531 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5532 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5533 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5534 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
5535 ; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
5536 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
5538 ; GFX8-GISEL-LABEL: clpeak_imad_pat_i16_x2:
5539 ; GFX8-GISEL: ; %bb.0: ; %entry
5540 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5541 ; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
5542 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
5543 ; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
5544 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
5545 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
5546 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4
5547 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v3, v2, 1
5548 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1
5549 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1
5550 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3
5551 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1
5552 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4
5553 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1
5554 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2
5555 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v4, 1
5556 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
5557 ; GFX8-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
5558 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
5560 ; GFX9-SDAG-LABEL: clpeak_imad_pat_i16_x2:
5561 ; GFX9-SDAG: ; %bb.0: ; %entry
5562 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5563 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5564 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
5565 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5566 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
5567 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5568 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
5569 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5570 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
5571 ; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
5572 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5574 ; GFX9-GISEL-LABEL: clpeak_imad_pat_i16_x2:
5575 ; GFX9-GISEL: ; %bb.0: ; %entry
5576 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5577 ; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
5578 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
5579 ; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
5580 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
5581 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1
5582 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4
5583 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v3, v2, 1
5584 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1
5585 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v4, 1
5586 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3
5587 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v2, v1, 1
5588 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4
5589 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v3, 1
5590 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2
5591 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v1, v4, 1
5592 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
5593 ; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
5594 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5596 ; GFX10-SDAG-LABEL: clpeak_imad_pat_i16_x2:
5597 ; GFX10-SDAG: ; %bb.0: ; %entry
5598 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5599 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5600 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5601 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5602 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5603 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5604 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5605 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5606 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
5607 ; GFX10-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16
5608 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5610 ; GFX10-GISEL-LABEL: clpeak_imad_pat_i16_x2:
5611 ; GFX10-GISEL: ; %bb.0: ; %entry
5612 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5613 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5614 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
5615 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5616 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
5617 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5618 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5619 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
5620 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
5621 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5622 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5623 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
5624 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
5625 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5626 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5627 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
5628 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5629 ; GFX10-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
5630 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5632 %conv69 = add i16 %x, 1
5633 %add = mul i16 %conv69, %y
5634 %conv470 = add i16 %y, 1
5635 %add8 = mul i16 %conv470, %add
5636 %conv1071 = add i16 %add, 1
5637 %add14 = mul i16 %conv1071, %add8
5638 %conv1672 = add i16 %add8, 1
5639 %add20 = mul i16 %conv1672, %add14
5640 %conv2273 = add i16 %add14, 1
5641 %add26 = mul i16 %conv2273, %add20
5642 %conv2874 = add i16 %add20, 1
5643 %add32 = mul i16 %conv2874, %add26
5644 %conv3475 = add i16 %add26, 1
5645 %add38 = mul i16 %conv3475, %add32
5646 %conv4076 = add i16 %add32, 1
5647 %add44 = mul i16 %add38, %conv4076
5651 define zeroext i16 @clpeak_umad_pat_i16_x2(i16 zeroext %x, i16 zeroext %y) {
5652 ; GFX67-SDAG-LABEL: clpeak_umad_pat_i16_x2:
5653 ; GFX67-SDAG: ; %bb.0: ; %entry
5654 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5655 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5656 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5657 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v0, v1
5658 ; GFX67-SDAG-NEXT: v_add_i32_e32 v3, vcc, 1, v1
5659 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
5660 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5661 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v3, v2
5662 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
5663 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5664 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
5665 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
5666 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v3, v2, 1
5667 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5668 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
5669 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3
5670 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
5671 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5672 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
5673 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
5674 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v2, v3, 1
5675 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5676 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v4
5677 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v2, v3
5678 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, 1
5679 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5680 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v4
5681 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
5682 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v2, v3, 1
5683 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5684 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5685 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v1
5686 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5687 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
5689 ; GFX67-GISEL-LABEL: clpeak_umad_pat_i16_x2:
5690 ; GFX67-GISEL: ; %bb.0: ; %entry
5691 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5692 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5693 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5694 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
5695 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5696 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5697 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
5698 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
5699 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5700 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5701 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5702 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5703 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5704 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5705 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
5706 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
5707 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5708 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5709 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5710 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5711 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5712 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5713 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
5714 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
5715 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5716 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5717 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v1
5718 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5719 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5720 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5721 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5722 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v1
5723 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5724 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
5726 ; GFX8-SDAG-LABEL: clpeak_umad_pat_i16_x2:
5727 ; GFX8-SDAG: ; %bb.0: ; %entry
5728 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5729 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5730 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5731 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5732 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5733 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5734 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5735 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5736 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
5737 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
5739 ; GFX8-GISEL-LABEL: clpeak_umad_pat_i16_x2:
5740 ; GFX8-GISEL: ; %bb.0: ; %entry
5741 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5742 ; GFX8-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
5743 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
5744 ; GFX8-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
5745 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
5746 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, 1
5747 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4
5748 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v3, v2, 1
5749 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1
5750 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v4, 1
5751 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3
5752 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1
5753 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4
5754 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1
5755 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2
5756 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v1, v4, 1
5757 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
5758 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
5760 ; GFX9-SDAG-LABEL: clpeak_umad_pat_i16_x2:
5761 ; GFX9-SDAG: ; %bb.0: ; %entry
5762 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5763 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5764 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
5765 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5766 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
5767 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5768 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v1, v0, v1, v0
5769 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v1, v0, v1
5770 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v0
5771 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
5773 ; GFX9-GISEL-LABEL: clpeak_umad_pat_i16_x2:
5774 ; GFX9-GISEL: ; %bb.0: ; %entry
5775 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5776 ; GFX9-GISEL-NEXT: v_add_u16_e32 v0, 1, v0
5777 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v0, v1
5778 ; GFX9-GISEL-NEXT: v_add_u16_e32 v3, 1, v1
5779 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v3, v2
5780 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, 1
5781 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v1, v0, v4
5782 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v3, v2, 1
5783 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v3, v2, v1
5784 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v4, 1
5785 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v4, v0, v3
5786 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v2, v1, 1
5787 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v2, v1, v4
5788 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v3, 1
5789 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v2
5790 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v1, v1, v4, 1
5791 ; GFX9-GISEL-NEXT: v_mul_lo_u16_e32 v0, v0, v1
5792 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
5794 ; GFX10-SDAG-LABEL: clpeak_umad_pat_i16_x2:
5795 ; GFX10-SDAG: ; %bb.0: ; %entry
5796 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5797 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5798 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5799 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5800 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5801 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5802 ; GFX10-SDAG-NEXT: v_mad_u16 v1, v0, v1, v0
5803 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v1, v0, v1
5804 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v0
5805 ; GFX10-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5806 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
5808 ; GFX10-GISEL-LABEL: clpeak_umad_pat_i16_x2:
5809 ; GFX10-GISEL: ; %bb.0: ; %entry
5810 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5811 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5812 ; GFX10-GISEL-NEXT: v_add_nc_u16 v2, v1, 1
5813 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5814 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v2, v0
5815 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5816 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5817 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
5818 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
5819 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5820 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5821 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
5822 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v1, v0
5823 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, 1
5824 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5825 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v1, 1
5826 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
5827 ; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5828 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
5830 %conv69 = add i16 %x, 1
5831 %add = mul i16 %conv69, %y
5832 %conv470 = add i16 %y, 1
5833 %add8 = mul i16 %conv470, %add
5834 %conv1071 = add i16 %add, 1
5835 %add14 = mul i16 %conv1071, %add8
5836 %conv1672 = add i16 %add8, 1
5837 %add20 = mul i16 %conv1672, %add14
5838 %conv2273 = add i16 %add14, 1
5839 %add26 = mul i16 %conv2273, %add20
5840 %conv2874 = add i16 %add20, 1
5841 %add32 = mul i16 %conv2874, %add26
5842 %conv3475 = add i16 %add26, 1
5843 %add38 = mul i16 %conv3475, %add32
5844 %conv4076 = add i16 %add32, 1
5845 %add44 = mul i16 %add38, %conv4076
5849 define <2 x i16> @clpeak_imad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
5850 ; GFX67-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
5851 ; GFX67-SDAG: ; %bb.0: ; %entry
5852 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5853 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5854 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
5855 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5856 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5857 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1
5858 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
5859 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0
5860 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5861 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1
5862 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5863 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5864 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v4, v2, 1
5865 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
5866 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v2
5867 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v5, v3, 1
5868 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5869 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v3
5870 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5871 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v0, v4, v2
5872 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5873 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v1, v5, v3
5874 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
5875 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v2, v0
5876 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1
5877 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v1
5878 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
5879 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v5, 1
5880 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5881 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1
5882 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
5883 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v2, v4, v0
5884 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5885 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v5, v1
5886 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5887 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2
5888 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v2, v4, 1
5889 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3
5890 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
5891 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v3, v5, 1
5892 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
5893 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6
5894 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
5895 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5
5896 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4
5897 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7
5898 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2
5899 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
5900 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
5901 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
5902 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
5903 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0
5904 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1
5905 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
5907 ; GFX67-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
5908 ; GFX67-GISEL: ; %bb.0: ; %entry
5909 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5910 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
5911 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
5912 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
5913 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
5914 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
5915 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
5916 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
5917 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
5918 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
5919 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5920 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
5921 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
5922 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
5923 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
5924 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5925 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5926 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
5927 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
5928 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
5929 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5930 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
5931 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
5932 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5933 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
5934 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
5935 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
5936 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
5937 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
5938 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
5939 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5940 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5941 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
5942 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
5943 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
5944 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
5945 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
5946 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
5947 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5948 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
5949 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
5950 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
5951 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5952 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
5953 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
5954 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
5955 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
5956 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
5957 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
5958 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
5959 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
5960 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
5961 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5962 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5963 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
5964 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
5965 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
5966 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
5967 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
5968 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
5969 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5970 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
5971 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
5972 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
5973 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5974 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
5975 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
5976 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
5977 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
5978 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
5979 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5980 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
5981 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
5982 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
5983 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
5984 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
5985 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
5986 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
5987 ; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
5988 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
5989 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5990 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
5991 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5992 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
5993 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
5994 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
5995 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
5996 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
5997 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
5998 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
5999 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
6000 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
6002 ; GFX8-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
6003 ; GFX8-SDAG: ; %bb.0: ; %entry
6004 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6005 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1
6006 ; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0
6007 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6008 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
6009 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v3, v0
6010 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v2, v1, v2
6011 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v4, v3
6012 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, 1
6013 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v5, v1
6014 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v2, v1, 1
6015 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v0, v0
6016 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v5, v1, v1
6017 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v2, v4
6018 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, 1
6019 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v3, v5
6020 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, 1
6021 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v2, v0
6022 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v0
6023 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v3, v1
6024 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v1
6025 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2
6026 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3
6027 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0
6028 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1
6029 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
6030 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6031 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
6032 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
6033 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
6035 ; GFX8-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
6036 ; GFX8-GISEL: ; %bb.0: ; %entry
6037 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6038 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1
6039 ; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0
6040 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6041 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
6042 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2
6043 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0
6044 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v4, v1
6045 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6046 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1
6047 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1
6048 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v1, v1
6049 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v0, v0
6050 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v2, v4
6051 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v3, v5
6052 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1
6053 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v5, v0, 1
6054 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v1
6055 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v3, v0, v0
6056 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2
6057 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3
6058 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1
6059 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v3, v0, 1
6060 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
6061 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
6062 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v6, v1
6063 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0
6064 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v2
6065 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6066 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6067 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
6069 ; GFX9-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
6070 ; GFX9-SDAG: ; %bb.0: ; %entry
6071 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6072 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
6073 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6074 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
6075 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6076 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6077 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6078 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
6079 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6080 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6081 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6082 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
6083 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6084 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6085 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
6086 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6087 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6088 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6090 ; GFX9-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
6091 ; GFX9-GISEL: ; %bb.0: ; %entry
6092 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6093 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
6094 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6095 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
6096 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6097 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6098 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6099 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
6100 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6101 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6102 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6103 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
6104 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6105 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6106 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
6107 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6108 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6109 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6111 ; GFX10-SDAG-LABEL: clpeak_imad_pat_v2i16_x2:
6112 ; GFX10-SDAG: ; %bb.0: ; %entry
6113 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6114 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
6115 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6116 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
6117 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
6118 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6119 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
6120 ; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
6121 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
6122 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
6123 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6124 ; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
6125 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6126 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6127 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
6128 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6129 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6130 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6132 ; GFX10-GISEL-LABEL: clpeak_imad_pat_v2i16_x2:
6133 ; GFX10-GISEL: ; %bb.0: ; %entry
6134 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6135 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
6136 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6137 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
6138 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
6139 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6140 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
6141 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
6142 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
6143 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
6144 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6145 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
6146 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6147 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6148 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
6149 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6150 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6151 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6153 %y38 = add <2 x i16> %x, <i16 1, i16 1>
6154 %add = mul <2 x i16> %y38, %y
6155 %mul139 = add <2 x i16> %add, %y38
6156 %add2 = mul <2 x i16> %mul139, %y
6157 %add240 = add <2 x i16> %add, <i16 1, i16 1>
6158 %add4 = mul <2 x i16> %add2, %add240
6159 %mul541 = add <2 x i16> %add4, %add240
6160 %add6 = mul <2 x i16> %mul541, %add2
6161 %add642 = add <2 x i16> %add4, <i16 1, i16 1>
6162 %add8 = mul <2 x i16> %add6, %add642
6163 %mul943 = add <2 x i16> %add8, %add642
6164 %add10 = mul <2 x i16> %mul943, %add6
6165 %add1044 = add <2 x i16> %add8, <i16 1, i16 1>
6166 %add1246 = add <2 x i16> %add10, <i16 1, i16 1>
6167 %mul1345 = mul <2 x i16> %add10, %add1044
6168 %add14 = mul <2 x i16> %mul1345, %add1246
6169 ret <2 x i16> %add14
6172 define <2 x i16> @clpeak_umad_pat_v2i16_x2(<2 x i16> %x, <2 x i16> %y) {
6173 ; GFX67-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
6174 ; GFX67-SDAG: ; %bb.0: ; %entry
6175 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6176 ; GFX67-SDAG-NEXT: v_add_i32_e32 v0, vcc, 1, v0
6177 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
6178 ; GFX67-SDAG-NEXT: v_add_i32_e32 v1, vcc, 1, v1
6179 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6180 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1
6181 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6182 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v2, v0
6183 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6184 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v5, v3, v1
6185 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6186 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v0, v2
6187 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v4, v2, 1
6188 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v1, v3
6189 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v2
6190 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v5, v3, 1
6191 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6192 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v3
6193 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6194 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v0, v4, v2
6195 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6196 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v1, v5, v3
6197 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6198 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v2, v0
6199 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v4, 1
6200 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v1
6201 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
6202 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v5, 1
6203 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6204 ; GFX67-SDAG-NEXT: v_and_b32_e32 v5, 0xffff, v1
6205 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6206 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v2, v4, v0
6207 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6208 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v3, v5, v1
6209 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6210 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v6, v0, v2
6211 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v2, v4, 1
6212 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v1, v3
6213 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
6214 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v3, v5, 1
6215 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, 1
6216 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v6
6217 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, 1
6218 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v5
6219 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v3, v3, v4
6220 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v7
6221 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v2, v4, v2
6222 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6223 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6224 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6225 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6226 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v0, v3, v0
6227 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v1, v2, v1
6228 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
6230 ; GFX67-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
6231 ; GFX67-GISEL: ; %bb.0: ; %entry
6232 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6233 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, 1, v1
6234 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v0
6235 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v1
6236 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v0
6237 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
6238 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
6239 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
6240 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
6241 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
6242 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6243 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v5, v3
6244 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v4, v2
6245 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v5, v1
6246 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v4, v0
6247 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6248 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6249 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6250 ; GFX67-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
6251 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0
6252 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6253 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v3
6254 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
6255 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
6256 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
6257 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
6258 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
6259 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
6260 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
6261 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
6262 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6263 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6264 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
6265 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
6266 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
6267 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
6268 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
6269 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
6270 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6271 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
6272 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
6273 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
6274 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6275 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
6276 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
6277 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
6278 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
6279 ; GFX67-GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v3
6280 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v2
6281 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5
6282 ; GFX67-GISEL-NEXT: v_or_b32_e32 v4, v4, v5
6283 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v4
6284 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6285 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6286 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
6287 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v5, v1, v5
6288 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v4, v0, v4
6289 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v5, v3
6290 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v4, v2
6291 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
6292 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6293 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
6294 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
6295 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v2
6296 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6297 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v3, v1
6298 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v5
6299 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v2, v0
6300 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, 1, v4
6301 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
6302 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6303 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
6304 ; GFX67-GISEL-NEXT: v_add_i32_e32 v4, vcc, 1, v1
6305 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
6306 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, 1, v0
6307 ; GFX67-GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4
6308 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
6309 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4
6310 ; GFX67-GISEL-NEXT: v_or_b32_e32 v3, v3, v4
6311 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v2
6312 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6313 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6314 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6315 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
6316 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v4
6317 ; GFX67-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v3
6318 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6319 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3
6320 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6321 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v3
6322 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
6323 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
6325 ; GFX8-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
6326 ; GFX8-SDAG: ; %bb.0: ; %entry
6327 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6328 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v3, 1
6329 ; GFX8-SDAG-NEXT: v_add_u16_e32 v2, 1, v0
6330 ; GFX8-SDAG-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6331 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v3, 16, v1
6332 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v0, v3, v0
6333 ; GFX8-SDAG-NEXT: v_mad_u16 v5, v2, v1, v2
6334 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v4, v3
6335 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v3, 1
6336 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v5, v1
6337 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v2, v1, 1
6338 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v4, v0, v0
6339 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v5, v1, v1
6340 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v2, v2, v4
6341 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v4, v0, 1
6342 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v3, v3, v5
6343 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v5, v1, 1
6344 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v4, v2, v0
6345 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v0
6346 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v5, v3, v1
6347 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v1
6348 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v0, v0, v2
6349 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v1, v1, v3
6350 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v4, v0
6351 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v1, v5, v1
6352 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v2, v0, v2
6353 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6354 ; GFX8-SDAG-NEXT: v_mad_u16 v1, v3, v1, v3
6355 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v1, v0
6356 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
6358 ; GFX8-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
6359 ; GFX8-GISEL: ; %bb.0: ; %entry
6360 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6361 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v3, 1
6362 ; GFX8-GISEL-NEXT: v_add_u16_e32 v2, 1, v0
6363 ; GFX8-GISEL-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
6364 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1
6365 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v2
6366 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v0, v3, v0
6367 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v4, v4, v1
6368 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
6369 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1
6370 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v3, 1
6371 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v1, v1
6372 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v0, v0
6373 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v2, v2, v4
6374 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v3, v3, v5
6375 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v1, 1
6376 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v5, v0, 1
6377 ; GFX8-GISEL-NEXT: v_mad_u16 v4, v2, v1, v1
6378 ; GFX8-GISEL-NEXT: v_mad_u16 v5, v3, v0, v0
6379 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v4, v2
6380 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v7, v5, v3
6381 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v2, v1, 1
6382 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v3, v0, 1
6383 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v4, v2, 1
6384 ; GFX8-GISEL-NEXT: v_mad_u16 v3, v5, v3, 1
6385 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v6, v1
6386 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v0, v7, v0
6387 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v1, v1, v2
6388 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6389 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v1, v0
6390 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
6392 ; GFX9-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
6393 ; GFX9-SDAG: ; %bb.0: ; %entry
6394 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6395 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
6396 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6397 ; GFX9-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
6398 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6399 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6400 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6401 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
6402 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6403 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6404 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6405 ; GFX9-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
6406 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6407 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6408 ; GFX9-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
6409 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6410 ; GFX9-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6411 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6413 ; GFX9-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
6414 ; GFX9-GISEL: ; %bb.0: ; %entry
6415 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6416 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
6417 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6418 ; GFX9-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
6419 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6420 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6421 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6422 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
6423 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6424 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6425 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6426 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
6427 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6428 ; GFX9-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6429 ; GFX9-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
6430 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6431 ; GFX9-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6432 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6434 ; GFX10-SDAG-LABEL: clpeak_umad_pat_v2i16_x2:
6435 ; GFX10-SDAG: ; %bb.0: ; %entry
6436 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6437 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v0, v0, -1 op_sel_hi:[1,0]
6438 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6439 ; GFX10-SDAG-NEXT: v_pk_add_u16 v0, v2, v0
6440 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v2, -1 op_sel_hi:[1,0]
6441 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6442 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v1, v0, v2
6443 ; GFX10-SDAG-NEXT: v_pk_add_u16 v2, v1, v2
6444 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v1, -1 op_sel_hi:[1,0]
6445 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v2, v0
6446 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6447 ; GFX10-SDAG-NEXT: v_pk_add_u16 v1, v2, v1
6448 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6449 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v1, v2, -1 op_sel_hi:[1,0]
6450 ; GFX10-SDAG-NEXT: v_pk_sub_u16 v2, v0, -1 op_sel_hi:[1,0]
6451 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6452 ; GFX10-SDAG-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6453 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6455 ; GFX10-GISEL-LABEL: clpeak_umad_pat_v2i16_x2:
6456 ; GFX10-GISEL: ; %bb.0: ; %entry
6457 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6458 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v0, 1 op_sel_hi:[1,0]
6459 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6460 ; GFX10-GISEL-NEXT: v_pk_add_u16 v0, v2, v0
6461 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v2, 1 op_sel_hi:[1,0]
6462 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6463 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v1, v0, v2
6464 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v1, v2
6465 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v1, 1 op_sel_hi:[1,0]
6466 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v2, v0
6467 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v2, v0, v1
6468 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, v1
6469 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v1, v0
6470 ; GFX10-GISEL-NEXT: v_pk_add_u16 v1, v2, 1 op_sel_hi:[1,0]
6471 ; GFX10-GISEL-NEXT: v_pk_add_u16 v2, v0, 1 op_sel_hi:[1,0]
6472 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v1
6473 ; GFX10-GISEL-NEXT: v_pk_mul_lo_u16 v0, v0, v2
6474 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6476 %y38 = add <2 x i16> %x, <i16 1, i16 1>
6477 %add = mul <2 x i16> %y38, %y
6478 %mul139 = add <2 x i16> %add, %y38
6479 %add2 = mul <2 x i16> %mul139, %y
6480 %add240 = add <2 x i16> %add, <i16 1, i16 1>
6481 %add4 = mul <2 x i16> %add2, %add240
6482 %mul541 = add <2 x i16> %add4, %add240
6483 %add6 = mul <2 x i16> %mul541, %add2
6484 %add642 = add <2 x i16> %add4, <i16 1, i16 1>
6485 %add8 = mul <2 x i16> %add6, %add642
6486 %mul943 = add <2 x i16> %add8, %add642
6487 %add10 = mul <2 x i16> %mul943, %add6
6488 %add1044 = add <2 x i16> %add8, <i16 1, i16 1>
6489 %add1246 = add <2 x i16> %add10, <i16 1, i16 1>
6490 %mul1345 = mul <2 x i16> %add10, %add1044
6491 %add14 = mul <2 x i16> %mul1345, %add1246
6492 ret <2 x i16> %add14
6495 ; Multiple uses of mul with different variable addend
6496 define <2 x i32> @multi_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z0, i32 %z1) {
6497 ; GFX67-LABEL: multi_use_mul_mad_i32_var:
6498 ; GFX67: ; %bb.0: ; %entry
6499 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6500 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
6501 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v2
6502 ; GFX67-NEXT: v_add_i32_e32 v1, vcc, v1, v3
6503 ; GFX67-NEXT: s_setpc_b64 s[30:31]
6505 ; GFX8-LABEL: multi_use_mul_mad_i32_var:
6506 ; GFX8: ; %bb.0: ; %entry
6507 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6508 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
6509 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v2
6510 ; GFX8-NEXT: v_add_u32_e32 v1, vcc, v1, v3
6511 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6513 ; GFX9-LABEL: multi_use_mul_mad_i32_var:
6514 ; GFX9: ; %bb.0: ; %entry
6515 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6516 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
6517 ; GFX9-NEXT: v_add_u32_e32 v0, v1, v2
6518 ; GFX9-NEXT: v_add_u32_e32 v1, v1, v3
6519 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6521 ; GFX10-LABEL: multi_use_mul_mad_i32_var:
6522 ; GFX10: ; %bb.0: ; %entry
6523 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6524 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
6525 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
6526 ; GFX10-NEXT: v_add_nc_u32_e32 v1, v1, v3
6527 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6529 %mul = mul i32 %x, %y
6530 %add0 = add i32 %mul, %z0
6531 %add1 = add i32 %mul, %z1
6532 %insert.0 = insertelement <2 x i32> poison, i32 %add0, i32 0
6533 %insert.1 = insertelement <2 x i32> %insert.0, i32 %add1, i32 1
6534 ret <2 x i32> %insert.1
6537 define <2 x i16> @multi_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z0, i16 %z1) {
6538 ; GFX67-SDAG-LABEL: multi_use_mul_mad_i16_var:
6539 ; GFX67-SDAG: ; %bb.0: ; %entry
6540 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6541 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v0
6542 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6543 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v4, v1, v2
6544 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v4, v1, v3
6545 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
6547 ; GFX67-GISEL-LABEL: multi_use_mul_mad_i16_var:
6548 ; GFX67-GISEL: ; %bb.0: ; %entry
6549 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6550 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6551 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6552 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1
6553 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2
6554 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v3
6555 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
6557 ; GFX8-SDAG-LABEL: multi_use_mul_mad_i16_var:
6558 ; GFX8-SDAG: ; %bb.0: ; %entry
6559 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6560 ; GFX8-SDAG-NEXT: v_mad_u16 v3, v0, v1, v3
6561 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
6562 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6563 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
6564 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
6566 ; GFX8-GISEL-LABEL: multi_use_mul_mad_i16_var:
6567 ; GFX8-GISEL: ; %bb.0: ; %entry
6568 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6569 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
6570 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
6571 ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v0, 16, v0
6572 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v2, v0
6573 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
6575 ; GFX9-SDAG-LABEL: multi_use_mul_mad_i16_var:
6576 ; GFX9-SDAG: ; %bb.0: ; %entry
6577 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6578 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v2, v0, v1, v2
6579 ; GFX9-SDAG-NEXT: v_mad_legacy_u16 v0, v0, v1, v3
6580 ; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x5040100
6581 ; GFX9-SDAG-NEXT: v_perm_b32 v0, v0, v2, s4
6582 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
6584 ; GFX9-GISEL-LABEL: multi_use_mul_mad_i16_var:
6585 ; GFX9-GISEL: ; %bb.0: ; %entry
6586 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6587 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v2, v0, v1, v2
6588 ; GFX9-GISEL-NEXT: v_mad_legacy_u16 v0, v0, v1, v3
6589 ; GFX9-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v2
6590 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
6592 ; GFX10-SDAG-LABEL: multi_use_mul_mad_i16_var:
6593 ; GFX10-SDAG: ; %bb.0: ; %entry
6594 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6595 ; GFX10-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2
6596 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3
6597 ; GFX10-SDAG-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
6598 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6600 ; GFX10-GISEL-LABEL: multi_use_mul_mad_i16_var:
6601 ; GFX10-GISEL: ; %bb.0: ; %entry
6602 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6603 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v0, v0, v1
6604 ; GFX10-GISEL-NEXT: v_add_nc_u16 v1, v0, v2
6605 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v0, v3
6606 ; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6607 ; GFX10-GISEL-NEXT: v_lshl_or_b32 v0, v0, 16, v1
6608 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6610 %mul = mul i16 %x, %y
6611 %add0 = add i16 %mul, %z0
6612 %add1 = add i16 %mul, %z1
6613 %insert.0 = insertelement <2 x i16> poison, i16 %add0, i16 0
6614 %insert.1 = insertelement <2 x i16> %insert.0, i16 %add1, i16 1
6615 ret <2 x i16> %insert.1
6618 define i32 @other_use_mul_mad_i32_var(i32 %x, i32 %y, i32 %z, ptr addrspace(3) %ptr) {
6619 ; GFX67-LABEL: other_use_mul_mad_i32_var:
6620 ; GFX67: ; %bb.0: ; %entry
6621 ; GFX67-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6622 ; GFX67-NEXT: v_mul_lo_u32 v1, v0, v1
6623 ; GFX67-NEXT: s_mov_b32 m0, -1
6624 ; GFX67-NEXT: v_add_i32_e32 v0, vcc, v1, v2
6625 ; GFX67-NEXT: ds_write_b32 v3, v1
6626 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
6627 ; GFX67-NEXT: s_setpc_b64 s[30:31]
6629 ; GFX8-LABEL: other_use_mul_mad_i32_var:
6630 ; GFX8: ; %bb.0: ; %entry
6631 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6632 ; GFX8-NEXT: v_mul_lo_u32 v1, v0, v1
6633 ; GFX8-NEXT: s_mov_b32 m0, -1
6634 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, v1, v2
6635 ; GFX8-NEXT: ds_write_b32 v3, v1
6636 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
6637 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6639 ; GFX9-LABEL: other_use_mul_mad_i32_var:
6640 ; GFX9: ; %bb.0: ; %entry
6641 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6642 ; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1
6643 ; GFX9-NEXT: v_add_u32_e32 v0, v1, v2
6644 ; GFX9-NEXT: ds_write_b32 v3, v1
6645 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6646 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6648 ; GFX10-LABEL: other_use_mul_mad_i32_var:
6649 ; GFX10: ; %bb.0: ; %entry
6650 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6651 ; GFX10-NEXT: v_mul_lo_u32 v1, v0, v1
6652 ; GFX10-NEXT: v_add_nc_u32_e32 v0, v1, v2
6653 ; GFX10-NEXT: ds_write_b32 v3, v1
6654 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6655 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6657 %mul = mul i32 %x, %y
6658 %add0 = add i32 %mul, %z
6659 store i32 %mul, ptr addrspace(3) %ptr
6663 define i16 @other_use_mul_mad_i16_var(i16 %x, i16 %y, i16 %z, ptr addrspace(3) %ptr) {
6664 ; GFX67-SDAG-LABEL: other_use_mul_mad_i16_var:
6665 ; GFX67-SDAG: ; %bb.0: ; %entry
6666 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6667 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6668 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6669 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v4, v0, v1
6670 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v1, v2
6671 ; GFX67-SDAG-NEXT: s_mov_b32 m0, -1
6672 ; GFX67-SDAG-NEXT: ds_write_b16 v3, v4
6673 ; GFX67-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6674 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
6676 ; GFX67-GISEL-LABEL: other_use_mul_mad_i16_var:
6677 ; GFX67-GISEL: ; %bb.0: ; %entry
6678 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6679 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6680 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6681 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v0, v1
6682 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v1, v2
6683 ; GFX67-GISEL-NEXT: s_mov_b32 m0, -1
6684 ; GFX67-GISEL-NEXT: ds_write_b16 v3, v1
6685 ; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6686 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
6688 ; GFX8-LABEL: other_use_mul_mad_i16_var:
6689 ; GFX8: ; %bb.0: ; %entry
6690 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6691 ; GFX8-NEXT: v_mul_lo_u16_e32 v4, v0, v1
6692 ; GFX8-NEXT: v_mad_u16 v0, v0, v1, v2
6693 ; GFX8-NEXT: s_mov_b32 m0, -1
6694 ; GFX8-NEXT: ds_write_b16 v3, v4
6695 ; GFX8-NEXT: s_waitcnt lgkmcnt(0)
6696 ; GFX8-NEXT: s_setpc_b64 s[30:31]
6698 ; GFX9-LABEL: other_use_mul_mad_i16_var:
6699 ; GFX9: ; %bb.0: ; %entry
6700 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6701 ; GFX9-NEXT: v_mul_lo_u16_e32 v4, v0, v1
6702 ; GFX9-NEXT: v_mad_legacy_u16 v0, v0, v1, v2
6703 ; GFX9-NEXT: ds_write_b16 v3, v4
6704 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6705 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6707 ; GFX10-SDAG-LABEL: other_use_mul_mad_i16_var:
6708 ; GFX10-SDAG: ; %bb.0: ; %entry
6709 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6710 ; GFX10-SDAG-NEXT: v_mul_lo_u16 v4, v0, v1
6711 ; GFX10-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
6712 ; GFX10-SDAG-NEXT: ds_write_b16 v3, v4
6713 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6714 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
6716 ; GFX10-GISEL-LABEL: other_use_mul_mad_i16_var:
6717 ; GFX10-GISEL: ; %bb.0: ; %entry
6718 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6719 ; GFX10-GISEL-NEXT: v_mul_lo_u16 v1, v0, v1
6720 ; GFX10-GISEL-NEXT: v_add_nc_u16 v0, v1, v2
6721 ; GFX10-GISEL-NEXT: ds_write_b16 v3, v1
6722 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6723 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
6725 %mul = mul i16 %x, %y
6726 %add0 = add i16 %mul, %z
6727 store i16 %mul, ptr addrspace(3) %ptr
6731 define <4 x i16> @multi_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z0, <2 x i16> %z1) {
6732 ; GFX67-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
6733 ; GFX67-SDAG: ; %bb.0: ; %entry
6734 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6735 ; GFX67-SDAG-NEXT: v_and_b32_e32 v8, 0xffff, v0
6736 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6737 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6738 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6739 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v5, v1, v3, v5
6740 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v4, v8, v2, v4
6741 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v3, v1, v3, v7
6742 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v2, v8, v2, v6
6743 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v0, 16, v5
6744 ; GFX67-SDAG-NEXT: v_and_b32_e32 v4, 0xffff, v4
6745 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v3
6746 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6747 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v4, v0
6748 ; GFX67-SDAG-NEXT: v_or_b32_e32 v2, v2, v1
6749 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v5
6750 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6751 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
6753 ; GFX67-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
6754 ; GFX67-GISEL: ; %bb.0: ; %entry
6755 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6756 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6757 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6758 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v2, v0, v2
6759 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1
6760 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3
6761 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v3, v0, v1
6762 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v2, v4
6763 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v3, v5
6764 ; GFX67-GISEL-NEXT: v_add_i32_e32 v2, vcc, v2, v6
6765 ; GFX67-GISEL-NEXT: v_add_i32_e32 v3, vcc, v3, v7
6766 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
6768 ; GFX8-SDAG-LABEL: multi_use_mul_mad_v2i16_var:
6769 ; GFX8-SDAG: ; %bb.0: ; %entry
6770 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6771 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1
6772 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
6773 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v2
6774 ; GFX8-SDAG-NEXT: v_mad_u16 v6, v5, v4, v6
6775 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v6, 16, v6
6776 ; GFX8-SDAG-NEXT: v_mad_u16 v2, v0, v1, v2
6777 ; GFX8-SDAG-NEXT: v_or_b32_e32 v2, v2, v6
6778 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v3
6779 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v5, v4, v6
6780 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4
6781 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v3
6782 ; GFX8-SDAG-NEXT: v_or_b32_e32 v1, v0, v4
6783 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, v2
6784 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
6786 ; GFX8-GISEL-LABEL: multi_use_mul_mad_v2i16_var:
6787 ; GFX8-GISEL: ; %bb.0: ; %entry
6788 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6789 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
6790 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1
6791 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v2
6792 ; GFX8-GISEL-NEXT: v_mad_u16 v6, v4, v5, v6
6793 ; GFX8-GISEL-NEXT: v_mad_u16 v2, v0, v1, v2
6794 ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v6, 16, v6
6795 ; GFX8-GISEL-NEXT: v_or_b32_e32 v2, v2, v6
6796 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v3
6797 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, v3
6798 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v5, v6
6799 ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6800 ; GFX8-GISEL-NEXT: v_or_b32_e32 v1, v0, v1
6801 ; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v2
6802 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
6804 ; GFX9-LABEL: multi_use_mul_mad_v2i16_var:
6805 ; GFX9: ; %bb.0: ; %entry
6806 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6807 ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v0, v1
6808 ; GFX9-NEXT: v_pk_add_u16 v0, v1, v2
6809 ; GFX9-NEXT: v_pk_add_u16 v1, v1, v3
6810 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6812 ; GFX10-LABEL: multi_use_mul_mad_v2i16_var:
6813 ; GFX10: ; %bb.0: ; %entry
6814 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6815 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
6816 ; GFX10-NEXT: v_pk_add_u16 v0, v1, v2
6817 ; GFX10-NEXT: v_pk_add_u16 v1, v1, v3
6818 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6820 %mul = mul <2 x i16> %x, %y
6821 %add0 = add <2 x i16> %mul, %z0
6822 %add1 = add <2 x i16> %mul, %z1
6823 %shuffle = shufflevector <2 x i16> %add0, <2 x i16> %add1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
6824 ret <4 x i16> %shuffle
6827 define <2 x i16> @other_use_mul_mad_v2i16_var(<2 x i16> %x, <2 x i16> %y, <2 x i16> %z, ptr addrspace(3) %ptr) {
6828 ; GFX67-SDAG-LABEL: other_use_mul_mad_v2i16_var:
6829 ; GFX67-SDAG: ; %bb.0: ; %entry
6830 ; GFX67-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6831 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6832 ; GFX67-SDAG-NEXT: v_and_b32_e32 v2, 0xffff, v2
6833 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6834 ; GFX67-SDAG-NEXT: v_and_b32_e32 v3, 0xffff, v3
6835 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v7, v0, v2
6836 ; GFX67-SDAG-NEXT: v_mul_u32_u24_e32 v8, v1, v3
6837 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v1, v1, v3, v5
6838 ; GFX67-SDAG-NEXT: v_mad_u32_u24 v0, v0, v2, v4
6839 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v8, 16, v8
6840 ; GFX67-SDAG-NEXT: v_and_b32_e32 v7, 0xffff, v7
6841 ; GFX67-SDAG-NEXT: v_lshlrev_b32_e32 v3, 16, v1
6842 ; GFX67-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0
6843 ; GFX67-SDAG-NEXT: v_or_b32_e32 v7, v7, v8
6844 ; GFX67-SDAG-NEXT: v_or_b32_e32 v0, v0, v3
6845 ; GFX67-SDAG-NEXT: s_mov_b32 m0, -1
6846 ; GFX67-SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v1
6847 ; GFX67-SDAG-NEXT: ds_write_b32 v6, v7
6848 ; GFX67-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6849 ; GFX67-SDAG-NEXT: s_setpc_b64 s[30:31]
6851 ; GFX67-GISEL-LABEL: other_use_mul_mad_v2i16_var:
6852 ; GFX67-GISEL: ; %bb.0: ; %entry
6853 ; GFX67-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6854 ; GFX67-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0
6855 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2
6856 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v0, v0, v2
6857 ; GFX67-GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1
6858 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v3
6859 ; GFX67-GISEL-NEXT: v_mul_u32_u24_e32 v1, v1, v2
6860 ; GFX67-GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v1
6861 ; GFX67-GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v0
6862 ; GFX67-GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3
6863 ; GFX67-GISEL-NEXT: v_or_b32_e32 v2, v2, v3
6864 ; GFX67-GISEL-NEXT: v_add_i32_e32 v0, vcc, v0, v4
6865 ; GFX67-GISEL-NEXT: v_add_i32_e32 v1, vcc, v1, v5
6866 ; GFX67-GISEL-NEXT: s_mov_b32 m0, -1
6867 ; GFX67-GISEL-NEXT: ds_write_b32 v6, v2
6868 ; GFX67-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6869 ; GFX67-GISEL-NEXT: s_setpc_b64 s[30:31]
6871 ; GFX8-SDAG-LABEL: other_use_mul_mad_v2i16_var:
6872 ; GFX8-SDAG: ; %bb.0: ; %entry
6873 ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6874 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v4, 16, v1
6875 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v5, 16, v0
6876 ; GFX8-SDAG-NEXT: v_mul_lo_u16_sdwa v6, v5, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6877 ; GFX8-SDAG-NEXT: v_mul_lo_u16_e32 v7, v0, v1
6878 ; GFX8-SDAG-NEXT: v_or_b32_e32 v6, v7, v6
6879 ; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v7, 16, v2
6880 ; GFX8-SDAG-NEXT: v_mad_u16 v4, v5, v4, v7
6881 ; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v4, 16, v4
6882 ; GFX8-SDAG-NEXT: v_mad_u16 v0, v0, v1, v2
6883 ; GFX8-SDAG-NEXT: v_or_b32_e32 v0, v0, v4
6884 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1
6885 ; GFX8-SDAG-NEXT: ds_write_b32 v3, v6
6886 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0)
6887 ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31]
6889 ; GFX8-GISEL-LABEL: other_use_mul_mad_v2i16_var:
6890 ; GFX8-GISEL: ; %bb.0: ; %entry
6891 ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6892 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0
6893 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v5, 16, v1
6894 ; GFX8-GISEL-NEXT: v_mul_lo_u16_e32 v6, v0, v1
6895 ; GFX8-GISEL-NEXT: v_mul_lo_u16_sdwa v7, v4, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
6896 ; GFX8-GISEL-NEXT: v_or_b32_e32 v6, v6, v7
6897 ; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v7, 16, v2
6898 ; GFX8-GISEL-NEXT: v_mad_u16 v0, v0, v1, v2
6899 ; GFX8-GISEL-NEXT: v_mad_u16 v1, v4, v5, v7
6900 ; GFX8-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
6901 ; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
6902 ; GFX8-GISEL-NEXT: s_mov_b32 m0, -1
6903 ; GFX8-GISEL-NEXT: ds_write_b32 v3, v6
6904 ; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
6905 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31]
6907 ; GFX9-LABEL: other_use_mul_mad_v2i16_var:
6908 ; GFX9: ; %bb.0: ; %entry
6909 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6910 ; GFX9-NEXT: v_pk_mul_lo_u16 v1, v0, v1
6911 ; GFX9-NEXT: v_pk_add_u16 v0, v1, v2
6912 ; GFX9-NEXT: ds_write_b32 v3, v1
6913 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
6914 ; GFX9-NEXT: s_setpc_b64 s[30:31]
6916 ; GFX10-LABEL: other_use_mul_mad_v2i16_var:
6917 ; GFX10: ; %bb.0: ; %entry
6918 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
6919 ; GFX10-NEXT: v_pk_mul_lo_u16 v1, v0, v1
6920 ; GFX10-NEXT: v_pk_add_u16 v0, v1, v2
6921 ; GFX10-NEXT: ds_write_b32 v3, v1
6922 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
6923 ; GFX10-NEXT: s_setpc_b64 s[30:31]
6925 %mul = mul <2 x i16> %x, %y
6926 %add0 = add <2 x i16> %mul, %z
6927 store <2 x i16> %mul, ptr addrspace(3) %ptr
6931 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: