1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
4 # Test that we fold correct element from G_UNMERGE_VALUES into fma
10 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
12 ; GFX10-LABEL: name: test_f32_add_mul
13 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
15 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
16 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
17 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
18 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
19 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
20 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
21 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
22 %0:_(s32) = COPY $vgpr0
23 %1:_(s32) = COPY $vgpr1
24 %ptr:_(p1) = COPY $vgpr2_vgpr3
25 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
26 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
27 %6:_(s32) = G_FMUL %0, %1
28 %7:_(s32) = G_FADD %6, %el1
33 name: test_f32_add_mul_rhs
36 fp32-input-denormals: false
37 fp32-output-denormals: false
40 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
42 ; GFX10-LABEL: name: test_f32_add_mul_rhs
43 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
45 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
46 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
47 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
48 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
49 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
50 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
51 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
52 %0:_(s32) = COPY $vgpr0
53 %1:_(s32) = COPY $vgpr1
54 %ptr:_(p1) = COPY $vgpr2_vgpr3
55 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
56 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
57 %6:_(s32) = G_FMUL %0, %1
58 %7:_(s32) = G_FADD %el1, %6
63 name: test_f16_f32_add_ext_mul
66 fp32-input-denormals: false
67 fp32-output-denormals: false
70 liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
72 ; GFX10-LABEL: name: test_f16_f32_add_ext_mul
73 ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
75 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
76 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
77 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
78 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
79 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
80 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
81 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
82 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
83 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
84 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
85 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
86 %0:_(s32) = COPY $sgpr0
87 %1:_(s16) = G_TRUNC %0(s32)
88 %2:_(s32) = COPY $sgpr1
89 %3:_(s16) = G_TRUNC %2(s32)
90 %ptr:_(p1) = COPY $vgpr0_vgpr1
91 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
92 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
93 %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
94 %9:_(s32) = G_FPEXT %8(s16)
95 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
96 $vgpr0 = COPY %10(s32)
100 name: test_f16_f32_add_ext_mul_rhs
103 fp32-input-denormals: false
104 fp32-output-denormals: false
107 liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
109 ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs
110 ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
112 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
113 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
114 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
115 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
116 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
117 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
118 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
119 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
120 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
121 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
122 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
123 %0:_(s32) = COPY $sgpr0
124 %1:_(s16) = G_TRUNC %0(s32)
125 %2:_(s32) = COPY $sgpr1
126 %3:_(s16) = G_TRUNC %2(s32)
127 %ptr:_(p1) = COPY $vgpr0_vgpr1
128 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
129 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
130 %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
131 %9:_(s32) = G_FPEXT %8(s16)
132 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
133 $vgpr0 = COPY %10(s32)
137 name: test_f32_add_fma_mul
140 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
142 ; GFX10-LABEL: name: test_f32_add_fma_mul
143 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
145 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
146 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
147 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
148 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
149 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
150 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
151 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
152 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
153 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
154 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
155 %0:_(s32) = COPY $vgpr0
156 %1:_(s32) = COPY $vgpr1
157 %2:_(s32) = COPY $vgpr2
158 %3:_(s32) = COPY $vgpr3
159 %ptr:_(p1) = COPY $vgpr4_vgpr5
160 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
161 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
162 %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
163 %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
164 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
165 $vgpr0 = COPY %10(s32)
169 name: test_f32_add_fma_mul_rhs
172 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
174 ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs
175 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
177 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
178 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
179 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
180 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
181 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
182 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
183 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
184 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
185 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
186 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
187 %0:_(s32) = COPY $vgpr0
188 %1:_(s32) = COPY $vgpr1
189 %2:_(s32) = COPY $vgpr2
190 %3:_(s32) = COPY $vgpr3
191 %ptr:_(p1) = COPY $vgpr4_vgpr5
192 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
193 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
194 %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
195 %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
196 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
197 $vgpr0 = COPY %10(s32)
201 name: test_f16_f32_add_fma_ext_mul
204 fp32-input-denormals: false
205 fp32-output-denormals: false
208 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
210 ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul
211 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
213 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
214 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
215 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
216 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
217 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
218 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
219 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
220 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
221 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
222 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
223 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
224 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
225 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
226 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
227 %0:_(s32) = COPY $vgpr0
228 %1:_(s32) = COPY $vgpr1
229 %ptr:_(p1) = COPY $vgpr2_vgpr3
230 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
231 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
232 %6:_(s32) = COPY $vgpr4
233 %7:_(s16) = G_TRUNC %6(s32)
234 %8:_(s32) = COPY $vgpr5
235 %9:_(s16) = G_TRUNC %8(s32)
236 %10:_(s16) = G_FMUL %7, %9
237 %11:_(s32) = G_FPEXT %10(s16)
238 %12:_(s32) = G_FMA %0, %1, %11
239 %13:_(s32) = G_FADD %12, %el1
240 $vgpr0 = COPY %13(s32)
244 name: test_f16_f32_add_ext_fma_mul
247 fp32-input-denormals: false
248 fp32-output-denormals: false
251 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
253 ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul
254 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
256 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
257 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
258 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
259 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
260 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
261 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
262 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
263 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
264 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
265 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
266 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
267 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
268 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
269 ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
270 ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
271 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
272 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
273 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
274 %0:_(s32) = COPY $vgpr0
275 %1:_(s16) = G_TRUNC %0(s32)
276 %2:_(s32) = COPY $vgpr1
277 %3:_(s16) = G_TRUNC %2(s32)
278 %ptr:_(p1) = COPY $vgpr2_vgpr3
279 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
280 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
281 %8:_(s32) = COPY $vgpr4
282 %9:_(s16) = G_TRUNC %8(s32)
283 %10:_(s32) = COPY $vgpr5
284 %11:_(s16) = G_TRUNC %10(s32)
285 %12:_(s16) = G_FMUL %9, %11
286 %13:_(s16) = G_FMUL %1, %3
287 %14:_(s16) = G_FADD %13, %12
288 %15:_(s32) = G_FPEXT %14(s16)
289 %16:_(s32) = G_FADD %15, %el1
290 $vgpr0 = COPY %16(s32)
294 name: test_f16_f32_add_fma_ext_mul_rhs
297 fp32-input-denormals: false
298 fp32-output-denormals: false
301 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
303 ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs
304 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
306 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
307 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
308 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
309 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
310 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
311 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
312 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
313 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
314 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
315 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
316 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
317 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
318 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
319 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
320 %ptr:_(p1) = COPY $vgpr0_vgpr1
321 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
322 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
323 %4:_(s32) = COPY $vgpr2
324 %5:_(s32) = COPY $vgpr3
325 %6:_(s32) = COPY $vgpr4
326 %7:_(s16) = G_TRUNC %6(s32)
327 %8:_(s32) = COPY $vgpr5
328 %9:_(s16) = G_TRUNC %8(s32)
329 %10:_(s16) = G_FMUL %7, %9
330 %11:_(s32) = G_FPEXT %10(s16)
331 %12:_(s32) = G_FMA %4, %5, %11
332 %13:_(s32) = G_FADD %el1, %12
333 $vgpr0 = COPY %13(s32)
337 name: test_f16_f32_add_ext_fma_mul_rhs
340 fp32-input-denormals: false
341 fp32-output-denormals: false
344 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
346 ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs
347 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
349 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
350 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
351 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
352 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
353 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
354 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
355 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
356 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
357 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
358 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
359 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
360 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
361 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
362 ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
363 ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
364 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
365 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
366 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
367 %ptr:_(p1) = COPY $vgpr0_vgpr1
368 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
369 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
370 %4:_(s32) = COPY $vgpr2
371 %5:_(s16) = G_TRUNC %4(s32)
372 %6:_(s32) = COPY $vgpr3
373 %7:_(s16) = G_TRUNC %6(s32)
374 %8:_(s32) = COPY $vgpr4
375 %9:_(s16) = G_TRUNC %8(s32)
376 %10:_(s32) = COPY $vgpr5
377 %11:_(s16) = G_TRUNC %10(s32)
378 %12:_(s16) = G_FMUL %9, %11
379 %13:_(s16) = G_FMUL %5, %7
380 %14:_(s16) = G_FADD %13, %12
381 %15:_(s32) = G_FPEXT %14(s16)
382 %16:_(s32) = G_FADD %el1, %15
383 $vgpr0 = COPY %16(s32)
387 name: test_f32_sub_mul
390 fp32-input-denormals: false
391 fp32-output-denormals: false
394 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
396 ; GFX10-LABEL: name: test_f32_sub_mul
397 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
399 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
400 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
401 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
402 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
403 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
404 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1
405 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]]
406 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
407 %0:_(s32) = COPY $vgpr0
408 %1:_(s32) = COPY $vgpr1
409 %ptr:_(p1) = COPY $vgpr0_vgpr1
410 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
411 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
412 %6:_(s32) = G_FMUL %0, %1
413 %7:_(s32) = G_FSUB %6, %el1
414 $vgpr0 = COPY %7(s32)
418 name: test_f32_sub_mul_rhs
421 fp32-input-denormals: false
422 fp32-output-denormals: false
425 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
427 ; GFX10-LABEL: name: test_f32_sub_mul_rhs
428 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
430 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
431 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
432 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
433 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
434 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
435 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
436 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1
437 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
438 %0:_(s32) = COPY $vgpr0
439 %1:_(s32) = COPY $vgpr1
440 %ptr:_(p1) = COPY $vgpr2_vgpr3
441 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
442 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
443 %6:_(s32) = G_FMUL %0, %1
444 %7:_(s32) = G_FSUB %el1, %6
445 $vgpr0 = COPY %7(s32)