1 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1100
2 # RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
3 # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=gcn-dpp-combine -verify-machineinstrs -o - %s | FileCheck %s -check-prefixes=GCN,GFX1150
7 # GCN-LABEL: name: vop3
8 # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec
9 # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
10 # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
11 # GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
12 # GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
14 tracksRegLiveness: true
17 liveins: $vgpr0, $vgpr1, $vgpr2
19 %0:vgpr_32 = COPY $vgpr0
20 %1:vgpr_32 = COPY $vgpr1
21 %2:vgpr_32 = COPY $vgpr2
22 %3:vgpr_32 = IMPLICIT_DEF
23 %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
25 %5:sreg_32_xm0_xexec = IMPLICIT_DEF
26 %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64 %4, %1, %5, 1, implicit $exec
28 %8:vgpr_32 = V_CVT_PK_U8_F32_e64 4, %4, 2, %2, 2, %1, 1, implicit $mode, implicit $exec
30 ; should not be combined because src2 literal is illegal
31 %9:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
32 %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec
34 ; should not be combined on subtargets where src1 imm is illegal
35 %11:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
36 %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec
40 # GCN-LABEL: name: vop3_sgpr_src1
41 # GCN: %6:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %1, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
42 # GFX1100: %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
43 # GFX1150: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
44 # GFX1100: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
45 # GFX1150: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
46 # GFX1100: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
47 # GFX1150: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
48 # GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
50 tracksRegLiveness: true
53 liveins: $vgpr0, $vgpr1, $sgpr0, $sgpr1
55 %0:vgpr_32 = COPY $vgpr0
56 %1:vgpr_32 = COPY $vgpr1
57 %2:sgpr_32 = COPY $sgpr0
58 %3:sgpr_32 = COPY $sgpr1
59 %4:vgpr_32 = IMPLICIT_DEF
61 ; should be combined because src2 allows sgpr
62 %5:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
63 %6:vgpr_32 = V_MED3_F32_e64 0, %5, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
65 ; should be combined only on subtargets that allow sgpr for src1
66 %7:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
67 %8:vgpr_32 = V_MED3_F32_e64 0, %7, 0, %2, 0, %1, 0, 0, implicit $mode, implicit $exec
69 ; should be combined only on subtargets that allow sgpr for src1
70 %9:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
71 %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
73 ; should be combined only on subtargets that allow inlinable constants for src1
74 %11:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
75 %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec
77 ; should not be combined when literal constants are used
78 %13:vgpr_32 = V_MOV_B32_dpp %4, %0, 1, 15, 15, 1, implicit $exec
79 %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec
83 # Regression test for src_modifiers on base u16 opcode
84 # GCN-LABEL: name: vop3_u16
85 # GCN: %5:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 0, %1, 0, %3, 0, 0, 1, 15, 15, 1, implicit $exec
86 # GCN: %7:vgpr_32 = V_ADD_NC_U16_fake16_e64_dpp %3, 1, %5, 2, %5, 0, 0, 1, 15, 15, 1, implicit $exec
87 # GCN: %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
89 tracksRegLiveness: true
92 liveins: $vgpr0, $vgpr1, $vgpr2
94 %0:vgpr_32 = COPY $vgpr0
95 %1:vgpr_32 = COPY $vgpr1
96 %2:vgpr_32 = COPY $vgpr2
97 %3:vgpr_32 = IMPLICIT_DEF
98 %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
99 %5:vgpr_32 = V_ADD_NC_U16_fake16_e64 0, %4, 0, %3, 0, 0, implicit $exec
100 %6:vgpr_32 = V_MOV_B32_dpp %3, %5, 1, 15, 15, 1, implicit $exec
101 %7:vgpr_32 = V_ADD_NC_U16_fake16_e64 1, %6, 2, %5, 0, 0, implicit $exec
102 %8:vgpr_32 = V_MOV_B32_dpp %3, %7, 1, 15, 15, 1, implicit $exec
103 %9:vgpr_32 = V_ADD_NC_U16_fake16_e64 4, %8, 8, %7, 0, 0, implicit $exec
107 tracksRegLiveness: true
110 liveins: $vgpr0, $vgpr1, $vgpr2
112 ; GCN-LABEL: name: vop3p
113 ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
114 ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
115 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
116 ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
117 ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
118 ; GCN: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
119 ; GCN: [[V_DOT2_F32_F16_:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp]], 0, [[COPY]], 0, [[COPY2]], 0, 5, 0, 0, 0, implicit $mode, implicit $exec
120 ; GCN: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 15, 1, implicit $exec
121 ; GCN: [[V_DOT2_F32_F16_1:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16 0, [[V_MOV_B32_dpp1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 4, 0, 0, implicit $mode, implicit $exec
122 ; GCN: [[V_DOT2_F32_F16_dpp:%[0-9]+]]:vgpr_32 = V_DOT2_F32_F16_dpp [[DEF]], 10, [[COPY1]], 8, [[COPY]], 9, [[COPY2]], 1, 0, 7, 4, 5, 1, 15, 15, 1, implicit $mode, implicit $exec
123 ; GCN: [[V_FMA_MIX_F32_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIX_F32_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
124 ; GCN: [[V_FMA_MIXLO_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXLO_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 0, [[COPY2]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
125 ; GCN: [[V_FMA_MIXHI_F16_dpp:%[0-9]+]]:vgpr_32 = V_FMA_MIXHI_F16_dpp [[DEF]], 8, [[COPY1]], 8, [[COPY]], 8, [[COPY2]], 1, [[COPY]], 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
126 %0:vgpr_32 = COPY $vgpr0
127 %1:vgpr_32 = COPY $vgpr1
128 %2:vgpr_32 = COPY $vgpr2
129 %3:vgpr_32 = IMPLICIT_DEF
131 ; this should not be combined because op_sel is not zero
132 %4:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
133 %5:vgpr_32 = V_DOT2_F32_F16 0, %4, 0, %0, 0, %2, 0, 5, 0, 0, 0, implicit $mode, implicit $exec
135 ; this should not be combined because op_sel_hi is not all set
136 %6:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
137 %7:vgpr_32 = V_DOT2_F32_F16 0, %6, 0, %0, 0, %2, 0, 0, 4, 0, 0, implicit $mode, implicit $exec
139 %8:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
140 %9:vgpr_32 = V_DOT2_F32_F16 10, %8, 8, %0, 9, %2, 1, 0, 7, 4, 5, implicit $mode, implicit $exec
142 %10:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
143 %11:vgpr_32 = V_FMA_MIX_F32 8, %10, 8, %0, 8, %2, 1, 0, 7, implicit $mode, implicit $exec
145 %12:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
146 %13:vgpr_32 = V_FMA_MIXLO_F16 8, %12, 8, %0, 8, %2, 0, %2, 0, 7, implicit $mode, implicit $exec
148 %14:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 15, 15, 1, implicit $exec
149 %15:vgpr_32 = V_FMA_MIXHI_F16 8, %14, 8, %0, 8, %2, 1, %0, 0, 7, implicit $mode, implicit $exec
153 # GCN-LABEL: name: fmac_e64
154 # GCN: %5:vgpr_32 = V_FMAC_F32_e64_dpp %3, 2, %0, 2, %1, 2, %2, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
156 tracksRegLiveness: true
159 liveins: $vgpr0, $vgpr1, $vgpr2
161 %0:vgpr_32 = COPY $vgpr0
162 %1:vgpr_32 = COPY $vgpr1
163 %2:vgpr_32 = COPY $vgpr2
164 %3:vgpr_32 = IMPLICIT_DEF
165 %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
166 %6:vgpr_32 = V_FMAC_F32_e64 2, %4, 2, %1, 2, %2, 1, 2, implicit $mode, implicit $exec
169 # when the DPP source isn't a src0 operand the operation should be commuted if possible
170 # GCN-LABEL: name: dpp_commute_shrink
171 # GCN: %4:vgpr_32 = V_MUL_U32_U24_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
172 # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
173 # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
174 # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
175 # GCN: %16:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
176 name: dpp_commute_shrink
177 tracksRegLiveness: true
180 liveins: $vgpr0, $vgpr1
182 %0:vgpr_32 = COPY $vgpr0
183 %1:vgpr_32 = COPY $vgpr1
185 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
186 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
187 %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 0, implicit $exec
189 %5:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
190 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 14, 0, implicit $exec
191 %7:vgpr_32 = V_AND_B32_e64 %1, %6, implicit $exec
193 %8:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec
194 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
195 %10:vgpr_32 = V_MAX_I32_e64 %1, %9, implicit $exec
197 %11:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
198 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 15, 14, 0, implicit $exec
199 %13:vgpr_32 = V_MIN_I32_e64 %1, %12, implicit $exec
201 %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
202 %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
203 %16:vgpr_32 = V_SUB_U32_e64 %1, %15, 0, implicit $exec
207 # do not combine, dpp arg used twice
208 # GCN-LABEL: name: dpp_arg_twice
209 # GCN: %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
210 # GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
211 # GCN: %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
213 tracksRegLiveness: true
216 liveins: $vgpr0, $vgpr1
218 %0:vgpr_32 = COPY $vgpr0
219 %1:vgpr_32 = COPY $vgpr1
220 %2:vgpr_32 = IMPLICIT_DEF
222 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
223 %4:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %3, 2, %3, 1, 2, implicit $mode, implicit $exec
225 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
226 %6:vgpr_32 = V_FMA_F32_e64 2, %5, 2, %1, 2, %5, 1, 2, implicit $mode, implicit $exec
228 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
229 %8:vgpr_32 = V_FMA_F32_e64 2, %7, 2, %7, 2, %1, 1, 2, implicit $mode, implicit $exec
233 # when the dpp source isn't a src0 operand the operation should be commuted if possible
234 # GCN-LABEL: name: dpp_commute_e64
235 # GCN: %4:vgpr_32 = V_MUL_U32_U24_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
236 # GCN: %7:vgpr_32 = V_FMA_F32_e64_dpp %5, 2, %0, 1, %1, 2, %1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
237 # GCN: %10:vgpr_32 = V_SUBREV_U32_e64_dpp %1, %0, %1, 1, 1, 14, 15, 0, implicit $exec
238 # GCN: %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64_dpp %1, %0, %1, 0, 1, 14, 15, 0, implicit $exec
239 # GCN: %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
240 name: dpp_commute_e64
241 tracksRegLiveness: true
244 liveins: $vgpr0, $vgpr1
246 %0:vgpr_32 = COPY $vgpr0
247 %1:vgpr_32 = COPY $vgpr1
249 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
250 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
251 %4:vgpr_32 = V_MUL_U32_U24_e64 %1, %3, 1, implicit $exec
253 %5:vgpr_32 = IMPLICIT_DEF
254 %6:vgpr_32 = V_MOV_B32_dpp %5, %0, 1, 15, 15, 1, implicit $exec
255 %7:vgpr_32 = V_FMA_F32_e64 1, %1, 2, %6, 2, %1, 1, 2, implicit $mode, implicit $exec
257 %8:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
258 %9:vgpr_32 = V_MOV_B32_dpp %8, %0, 1, 14, 15, 0, implicit $exec
259 %10:vgpr_32 = V_SUB_U32_e64 %1, %9, 1, implicit $exec
261 %11:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
262 %12:vgpr_32 = V_MOV_B32_dpp %11, %0, 1, 14, 15, 0, implicit $exec
263 %13:vgpr_32, %14:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1, %12, 0, implicit $exec
265 ; this cannot be combined because immediate as src0 isn't commutable
266 %15:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
267 %16:vgpr_32 = V_MOV_B32_dpp %15, %0, 1, 14, 15, 0, implicit $exec
268 %17:vgpr_32, %18:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 5, %16, 0, implicit $exec
273 # check for floating point modifiers
274 # GCN-LABEL: name: add_f32_e64
275 # GCN: %4:vgpr_32 = V_ADD_F32_e64_dpp %2, 0, %1, 0, %0, 0, 1, 1, 15, 15, 1, implicit $mode, implicit $exec
276 # GCN: %6:vgpr_32 = V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
277 # GCN: %8:vgpr_32 = V_ADD_F32_dpp %2, 1, %1, 2, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
278 # GCN: %10:vgpr_32 = V_ADD_F32_e64_dpp %2, 4, %1, 8, %0, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec
281 tracksRegLiveness: true
284 liveins: $vgpr0, $vgpr1
286 %0:vgpr_32 = COPY $vgpr0
287 %1:vgpr_32 = COPY $vgpr1
288 %2:vgpr_32 = IMPLICIT_DEF
290 ; this should be combined as e64
291 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
292 %4:vgpr_32 = V_ADD_F32_e64 0, %3, 0, %0, 0, 1, implicit $mode, implicit $exec
294 ; this should be combined and shrunk as all modifiers are default
295 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
296 %6:vgpr_32 = V_ADD_F32_e64 0, %5, 0, %0, 0, 0, implicit $mode, implicit $exec
298 ; this should be combined and shrunk as modifiers other than abs|neg are default
299 %7:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
300 %8:vgpr_32 = V_ADD_F32_e64 1, %7, 2, %0, 0, 0, implicit $mode, implicit $exec
302 ; this should be combined as e64
303 %9:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
304 %10:vgpr_32 = V_ADD_F32_e64 4, %9, 8, %0, 0, 0, implicit $mode, implicit $exec
307 # check for e64 modifiers
308 # GCN-LABEL: name: add_u32_e64
309 # GCN: %4:vgpr_32 = V_ADD_U32_dpp %2, %0, %1, 1, 15, 15, 1, implicit $exec
310 # GCN: %6:vgpr_32 = V_ADD_U32_e64_dpp %2, %0, %1, 1, 1, 15, 15, 1, implicit $exec
313 tracksRegLiveness: true
316 liveins: $vgpr0, $vgpr1
318 %0:vgpr_32 = COPY $vgpr0
319 %1:vgpr_32 = COPY $vgpr1
320 %2:vgpr_32 = IMPLICIT_DEF
322 ; this should be combined and shrunk as all modifiers are default
323 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
324 %4:vgpr_32 = V_ADD_U32_e64 %3, %1, 0, implicit $exec
326 ; this should be combined as _e64
327 %5:vgpr_32 = V_MOV_B32_dpp undef %2, %0, 1, 15, 15, 1, implicit $exec
328 %6:vgpr_32 = V_ADD_U32_e64 %5, %1, 1, implicit $exec
331 # tests on sequences of dpp consumers
332 # GCN-LABEL: name: dpp_seq
333 # GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
334 # GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
335 # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
337 # GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
340 tracksRegLiveness: true
343 liveins: $vgpr0, $vgpr1
344 %0:vgpr_32 = COPY $vgpr0
345 %1:vgpr_32 = COPY $vgpr1
346 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
348 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
349 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
350 %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
351 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
353 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
354 %8:vgpr_32 = V_ADD_U32_e32 %7, %1, implicit $exec
355 ; this breaks the sequence
356 %9:vgpr_32 = V_SUB_U32_e32 5, %7, implicit $exec
359 # tests on sequences of dpp consumers followed by control flow
360 # GCN-LABEL: name: dpp_seq_cf
361 # GCN: %4:vgpr_32 = V_ADD_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
362 # GCN: %5:vgpr_32 = V_SUBREV_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
363 # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
366 tracksRegLiveness: true
369 successors: %bb.1, %bb.2
370 liveins: $vgpr0, $vgpr1
371 %0:vgpr_32 = COPY $vgpr0
372 %1:vgpr_32 = COPY $vgpr1
373 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
375 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
376 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
377 %5:vgpr_32 = V_SUB_U32_e32 %1, %3, implicit $exec
378 %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
380 %7:sreg_32 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec
381 %8:sreg_32 = SI_IF %7, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
388 SI_END_CF %8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
391 # GCN-LABEL: name: old_in_diff_bb
392 # GCN: %4:vgpr_32 = V_ADD_U32_dpp %0, %1, %0, 1, 1, 1, 0, implicit $exec
395 tracksRegLiveness: true
399 liveins: $vgpr0, $vgpr1
401 %0:vgpr_32 = COPY $vgpr0
402 %1:vgpr_32 = COPY $vgpr1
403 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
407 %3:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 1, 1, 0, implicit $exec
408 %4:vgpr_32 = V_ADD_U32_e32 %3, %0, implicit $exec
411 # old reg def is in diff BB but bound_ctrl:1 - can combine
412 # GCN-LABEL: name: old_in_diff_bb_bctrl_zero
413 # GCN: %4:vgpr_32 = V_ADD_U32_dpp {{%[0-9]}}, %0, %1, 1, 15, 15, 1, implicit $exec
415 name: old_in_diff_bb_bctrl_zero
416 tracksRegLiveness: true
420 liveins: $vgpr0, $vgpr1
422 %0:vgpr_32 = COPY $vgpr0
423 %1:vgpr_32 = COPY $vgpr1
424 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
428 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
429 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
432 # EXEC mask changed between def and use - cannot combine
433 # GCN-LABEL: name: exec_changed
434 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
437 tracksRegLiveness: true
440 liveins: $vgpr0, $vgpr1
442 %0:vgpr_32 = COPY $vgpr0
443 %1:vgpr_32 = COPY $vgpr1
444 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
445 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
446 %4:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
447 %5:sreg_64 = COPY $exec, implicit-def $exec
448 %6:vgpr_32 = V_ADD_U32_e32 %3, %1, implicit $exec
451 # test if $old definition is correctly tracked through subreg manipulation pseudos
453 # GCN-LABEL: name: mul_old_subreg
454 # GCN: %7:vgpr_32 = V_MUL_I32_I24_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
457 tracksRegLiveness: true
460 liveins: $vgpr0, $vgpr1
462 %0:vreg_64 = COPY $vgpr0
463 %1:vgpr_32 = COPY $vgpr1
464 %2:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
465 %3:vgpr_32 = V_MOV_B32_e32 42, implicit $exec
466 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
467 %5:vreg_64 = INSERT_SUBREG %4, %1, %subreg.sub1 ; %5.sub0 is taken from %4
468 %6:vgpr_32 = V_MOV_B32_dpp %5.sub0, %1, 1, 1, 1, 0, implicit $exec
469 %7:vgpr_32 = V_MUL_I32_I24_e32 %6, %0.sub1, implicit $exec
472 # GCN-LABEL: name: add_old_subreg
473 # GCN: %5:vgpr_32 = V_ADD_U32_dpp %0.sub1, %1, %0.sub1, 1, 1, 1, 0, implicit $exec
476 tracksRegLiveness: true
479 liveins: $vgpr0, $vgpr1
481 %0:vreg_64 = COPY $vgpr0
482 %1:vgpr_32 = COPY $vgpr1
483 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
484 %3:vreg_64 = INSERT_SUBREG %0, %2, %subreg.sub1 ; %3.sub1 is inserted
485 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 1, 1, 0, implicit $exec
486 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
489 # GCN-LABEL: name: add_old_subreg_undef
490 # GCN: %5:vgpr_32 = V_ADD_U32_dpp undef %3.sub1, %1, %0.sub1, 1, 15, 15, 1, implicit $exec
492 name: add_old_subreg_undef
493 tracksRegLiveness: true
496 liveins: $vgpr0, $vgpr1
498 %0:vreg_64 = COPY $vgpr0
499 %1:vgpr_32 = COPY $vgpr1
500 %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
501 %3:vreg_64 = REG_SEQUENCE %2, %subreg.sub0 ; %3.sub1 is undef
502 %4:vgpr_32 = V_MOV_B32_dpp %3.sub1, %1, 1, 15, 15, 1, implicit $exec
503 %5:vgpr_32 = V_ADD_U32_e32 %4, %0.sub1, implicit $exec
506 # Test instruction which does not have modifiers in VOP1 form but does in DPP form.
507 # GCN-LABEL: name: dpp_vop1
508 # GCN: %3:vgpr_32 = V_CEIL_F32_dpp %0, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
510 tracksRegLiveness: true
513 %1:vgpr_32 = IMPLICIT_DEF
514 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
515 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
518 # Test instruction which does not have modifiers in VOP2 form but does in DPP form.
519 # GCN-LABEL: name: dpp_min
520 # GCN: %3:vgpr_32 = V_MIN_F32_dpp %0, 0, undef %2:vgpr_32, 0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
522 tracksRegLiveness: true
525 %1:vgpr_32 = IMPLICIT_DEF
526 %2:vgpr_32 = V_MOV_B32_dpp %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
527 %4:vgpr_32 = V_MIN_F32_e32 %2, undef %3:vgpr_32, implicit $mode, implicit $exec
530 # Test an undef old operand
531 # GCN-LABEL: name: dpp_undef_old
532 # GCN: %3:vgpr_32 = V_CEIL_F32_dpp undef %1:vgpr_32, 0, undef %2:vgpr_32, 1, 15, 15, 1, implicit $mode, implicit $exec
534 tracksRegLiveness: true
537 %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
538 %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $mode, implicit $exec
541 # Do not combine a dpp mov which writes a physreg.
542 # GCN-LABEL: name: phys_dpp_mov_dst
543 # GCN: $vgpr0 = V_MOV_B32_dpp undef %0:vgpr_32, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
544 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
545 name: phys_dpp_mov_dst
546 tracksRegLiveness: true
549 $vgpr0 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
550 %2:vgpr_32 = V_CEIL_F32_e32 $vgpr0, implicit $mode, implicit $exec
553 # Do not combine a dpp mov which reads a physreg.
554 # GCN-LABEL: name: phys_dpp_mov_old_src
555 # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %1:vgpr_32, 1, 15, 15, 1, implicit $exec
556 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
557 name: phys_dpp_mov_old_src
558 tracksRegLiveness: true
561 %1:vgpr_32 = V_MOV_B32_dpp undef $vgpr0, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
562 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
565 # Do not combine a dpp mov which reads a physreg.
566 # GCN-LABEL: name: phys_dpp_mov_src
567 # GCN: %0:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
568 # GCN: %2:vgpr_32 = V_CEIL_F32_e32 %0, implicit $mode, implicit $exec
569 name: phys_dpp_mov_src
570 tracksRegLiveness: true
573 %1:vgpr_32 = V_MOV_B32_dpp undef %0:vgpr_32, undef $vgpr0, 1, 15, 15, 1, implicit $exec
574 %2:vgpr_32 = V_CEIL_F32_e32 %1, implicit $mode, implicit $exec
577 # GCN-LABEL: name: dpp_reg_sequence_both_combined
578 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
579 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
580 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
581 # GCN: %9:vgpr_32 = IMPLICIT_DEF
582 # GCN: %8:vgpr_32 = IMPLICIT_DEF
583 # GCN: %6:vgpr_32 = V_ADD_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
584 # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
585 name: dpp_reg_sequence_both_combined
586 tracksRegLiveness: true
589 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
591 %0:vreg_64 = COPY $vgpr0_vgpr1
592 %1:vreg_64 = COPY $vgpr2_vgpr3
593 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
594 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
595 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
596 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
597 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
598 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
601 # GCN-LABEL: name: dpp_reg_sequence_first_combined
602 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
603 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
604 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
605 # GCN: %8:vgpr_32 = IMPLICIT_DEF
606 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
607 # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
608 # GCN: %6:vgpr_32 = V_ADD_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit $exec
609 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
610 name: dpp_reg_sequence_first_combined
611 tracksRegLiveness: true
614 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
616 %0:vreg_64 = COPY $vgpr0_vgpr1
617 %1:vreg_64 = COPY $vgpr2_vgpr3
618 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
619 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
620 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
621 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
622 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
623 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
626 # GCN-LABEL: name: dpp_reg_sequence_second_combined
627 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
628 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
629 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
630 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
631 # GCN: %8:vgpr_32 = IMPLICIT_DEF
632 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
633 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
634 # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
635 name: dpp_reg_sequence_second_combined
636 tracksRegLiveness: true
639 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
641 %0:vreg_64 = COPY $vgpr0_vgpr1
642 %1:vreg_64 = COPY $vgpr2_vgpr3
643 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
644 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
645 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
646 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
647 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
648 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
651 # GCN-LABEL: name: dpp_reg_sequence_none_combined
652 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
653 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
654 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
655 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
656 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
657 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
658 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
659 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
660 name: dpp_reg_sequence_none_combined
661 tracksRegLiveness: true
664 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
666 %0:vreg_64 = COPY $vgpr0_vgpr1
667 %1:vreg_64 = COPY $vgpr2_vgpr3
668 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
669 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
670 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
671 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
672 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
673 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
676 # GCN-LABEL: name: dpp_reg_sequence_exec_changed
677 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
678 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
679 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
680 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
681 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
682 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
683 # GCN: S_BRANCH %bb.1
685 # GCN: %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %2, implicit $exec
686 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
687 name: dpp_reg_sequence_exec_changed
688 tracksRegLiveness: true
691 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
693 %0:vreg_64 = COPY $vgpr0_vgpr1
694 %1:vreg_64 = COPY $vgpr2_vgpr3
695 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
696 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
697 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
698 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
703 %6:vgpr_32 = V_ADD_U32_e32 %4.sub0, %5, implicit $exec
704 %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
707 # GCN-LABEL: name: dpp_reg_sequence_subreg
708 # GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
709 # GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
710 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
711 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
712 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
713 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
714 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
715 # GCN: %7:vgpr_32 = V_ADD_U32_e32 %6.sub0, %2, implicit $exec
716 # GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
717 name: dpp_reg_sequence_subreg
718 tracksRegLiveness: true
721 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
723 %0:vreg_64 = COPY $vgpr0_vgpr1
724 %1:vreg_64 = COPY $vgpr2_vgpr3
725 %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
726 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
727 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
728 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
729 %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
730 %6:vgpr_32 = V_ADD_U32_e32 %5.sub0, %8, implicit $exec
731 %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
734 # GCN-LABEL: name: dpp_reg_sequence_src2_reject
735 #GCN: %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
736 #GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
737 #GCN: %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
738 #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
739 #GCN: %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
740 #GCN: %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
741 name: dpp_reg_sequence_src2_reject
742 tracksRegLiveness: true
745 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
747 %0:vreg_64 = COPY $vgpr0_vgpr1
748 %1:vreg_64 = COPY $vgpr2_vgpr3
749 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
750 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
751 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
752 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
753 ; use of dpp arg as src2, reject
754 %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub0, 1, 2, implicit $mode, implicit $exec
755 ; cannot commute src0 and src2, and %4.sub0 already rejected, reject
756 %7:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
759 # GCN-LABEL: name: dpp_reg_sequence_src2
760 #GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
761 #GCN: %4:vreg_64 = REG_SEQUENCE undef %2:vgpr_32, %subreg.sub0, %3, %subreg.sub1
762 #GCN: %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
763 #GCN: %6:vgpr_32 = V_FMA_F32_e64_dpp %8, 2, %1.sub0, 2, %5, 2, %4.sub1, 1, 2, 1, 15, 15, 1, implicit $mode, implicit $exec
764 name: dpp_reg_sequence_src2
765 tracksRegLiveness: true
768 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
770 %0:vreg_64 = COPY $vgpr0_vgpr1
771 %1:vreg_64 = COPY $vgpr2_vgpr3
772 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
773 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
774 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
775 %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
776 %6:vgpr_32 = V_FMA_F32_e64 2, %4.sub0, 2, %5, 2, %4.sub1, 1, 2, implicit $mode, implicit $exec
779 # GCN-LABEL: name: dpp64_add64_impdef
780 # GCN: %3:vgpr_32 = V_ADD_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
781 # GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
782 name: dpp64_add64_impdef
783 tracksRegLiveness: true
787 %0:vreg_64 = IMPLICIT_DEF
788 %1:vreg_64 = IMPLICIT_DEF
789 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec
790 %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
791 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
794 # GCN-LABEL: name: dpp64_add64_undef
795 # GCN: %3:vgpr_32 = V_ADD_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit $exec
796 # GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
797 name: dpp64_add64_undef
798 tracksRegLiveness: true
802 %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
803 %5:vgpr_32 = V_ADD_U32_e32 %2.sub0, undef %4:vgpr_32, implicit $exec
804 %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
808 # GCN-LABEL: name: cndmask_with_src2
809 # GCN: %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
810 # GCN: %8:vgpr_32 = V_CNDMASK_B32_e64_dpp %2, 4, %1, 0, %1, %7, 1, 15, 15, 1, implicit $exec
811 name: cndmask_with_src2
812 tracksRegLiveness: true
815 liveins: $vgpr0, $vgpr1
816 %0:vgpr_32 = COPY $vgpr0
817 %1:vgpr_32 = COPY $vgpr1
818 %2:vgpr_32 = IMPLICIT_DEF
820 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
821 %4:sreg_32_xm0_xexec = IMPLICIT_DEF
822 %5:vgpr_32 = V_CNDMASK_B32_e64 0, %3, 0, %1, %4, implicit $exec
824 ; src2 is legal for _e64
825 %6:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 15, 1, implicit $exec
826 %7:sreg_32_xm0_xexec = IMPLICIT_DEF
827 %8:vgpr_32 = V_CNDMASK_B32_e64 4, %6, 0, %1, %7, implicit $exec
832 # Make sure flags aren't dropped
833 # GCN-LABEL: name: flags_add_f32_e64
834 # GCN: %4:vgpr_32 = nnan nofpexcept V_ADD_F32_dpp %2, 0, %1, 0, %0, 1, 15, 15, 1, implicit $mode, implicit $exec
835 name: flags_add_f32_e64
836 tracksRegLiveness: true
839 liveins: $vgpr0, $vgpr1
841 %0:vgpr_32 = COPY $vgpr0
842 %1:vgpr_32 = COPY $vgpr1
843 %2:vgpr_32 = IMPLICIT_DEF
845 %3:vgpr_32 = V_MOV_B32_dpp undef %2, %1, 1, 15, 15, 1, implicit $exec
846 %4:vgpr_32 = nofpexcept nnan V_ADD_F32_e64 0, %3, 0, %0, 0, 0, implicit $mode, implicit $exec
847 S_ENDPGM 0, implicit %4
851 # GCN-LABEL: name: dont_combine_more_than_one_operand
852 # GCN: %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
853 name: dont_combine_more_than_one_operand
854 tracksRegLiveness: true
857 liveins: $vgpr0, $vgpr1
858 %0:vgpr_32 = COPY $vgpr0
859 %1:vgpr_32 = COPY $vgpr1
860 %2:vgpr_32 = V_MOV_B32_dpp %0, %1, 1, 15, 15, 1, implicit $exec
861 %3:vgpr_32 = V_MAX_F32_e64 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
864 # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence
865 # GCN: %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
866 # GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec
867 name: dont_combine_more_than_one_operand_dpp_reg_sequence
868 tracksRegLiveness: true
871 liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vcc_lo
872 %0:vreg_64 = COPY $vgpr0_vgpr1
873 %1:vreg_64 = COPY $vgpr2_vgpr3
874 %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
875 %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
876 %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
877 %5:vgpr_32 = V_ADD_U32_e32 %4.sub0, %4.sub0, implicit $exec
878 %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
881 # Check op_sel is all 0s when combining
882 # GCN-LABEL: name: opsel_vop3
883 # GCN: %4:vgpr_32 = V_ADD_I16_fake16_e64_dpp %2, 0, %0, 0, %1, 0, 0, 1, 15, 15, 1, implicit $exec
884 # GCN: %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
885 # GCN: %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
886 # GCN: %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
887 # GCN: %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
889 tracksRegLiveness: true
892 liveins: $vgpr0, $vgpr1
894 %0:vgpr_32 = COPY $vgpr0
895 %1:vgpr_32 = COPY $vgpr1
896 %2:vgpr_32 = IMPLICIT_DEF
898 ; Combine for op_sel:[0,0,0]
899 %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
900 %4:vgpr_32 = V_ADD_I16_fake16_e64 0, %3, 0, %1, 0, 0, implicit $exec
902 ; Do not combine for op_sel:[1,0,0]
903 %5:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
904 %6:vgpr_32 = V_ADD_I16_fake16_e64 4, %5, 0, %1, 0, 0, implicit $exec
906 ; Do not combine for op_sel:[0,1,0]
907 %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
908 %8:vgpr_32 = V_ADD_I16_fake16_e64 0, %7, 4, %1, 0, 0, implicit $exec
910 ; Do not combine for op_sel:[1,1,0]
911 %9:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
912 %10:vgpr_32 = V_ADD_I16_fake16_e64 4, %9, 4, %1, 0, 0, implicit $exec
914 ; Do not combine for op_sel:[0,0,1] (dst_op_sel only)
915 %11:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 15, 15, 1, implicit $exec
916 %12:vgpr_32 = V_ADD_I16_fake16_e64 8, %11, 0, %1, 0, 0, implicit $exec
919 # Check op_sel is all 0s and op_sel_hi is all 1s when combining
920 # GCN-LABEL: name: opsel_vop3p
921 # GCN: %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
922 # GCN: %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
923 # GCN: %9:vgpr_32 = V_FMA_MIX_F32_dpp %3, 8, %0, 8, %1, 8, %2, 0, 0, 7, 1, 15, 15, 1, implicit $mode, implicit $exec
924 # GCN: %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec
927 tracksRegLiveness: true
930 liveins: $vgpr0, $vgpr1, $vgpr2
932 %0:vgpr_32 = COPY $vgpr0
933 %1:vgpr_32 = COPY $vgpr1
934 %2:vgpr_32 = COPY $vgpr2
935 %3:vgpr_32 = IMPLICIT_DEF
937 ; Do not combine for op_sel:[0,0,0] op_sel_hi:[0,0,0]
938 %4:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
939 %5:vgpr_32 = V_FMA_MIX_F32 0, %4, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
941 ; Do not combine for op_sel:[1,1,1] op_sel_hi:[0,0,0]
942 %6:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
943 %7:vgpr_32 = V_FMA_MIX_F32 4, %6, 4, %1, 4, %2, 0, 0, 0, implicit $mode, implicit $exec
945 ; Combine for op_sel:[0,0,0] op_sel_hi:[1,1,1]
946 %8:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
947 %9:vgpr_32 = V_FMA_MIX_F32 8, %8, 8, %1, 8, %2, 0, 0, 0, implicit $mode, implicit $exec
949 ; Do not combine for op_sel:[1,1,1] op_sel_hi:[1,1,1]
950 %10:vgpr_32 = V_MOV_B32_dpp %3, %0, 1, 15, 15, 1, implicit $exec
951 %11:vgpr_32 = V_FMA_MIX_F32 12, %10, 12, %1, 12, %2, 0, 0, 0, implicit $mode, implicit $exec