1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s --mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s
4 declare half @llvm.fma.f16(half, half, half)
5 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
6 declare <16 x half> @llvm.fma.v16f16(<16 x half>, <16 x half>, <16 x half>)
7 declare <32 x half> @llvm.fma.v32f16(<32 x half>, <32 x half>, <32 x half>)
9 define half @fma_123_f16(half %x, half %y, half %z) {
10 ; CHECK-LABEL: fma_123_f16:
12 ; CHECK-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0
14 %a = call half @llvm.fma.f16(half %x, half %y, half %z)
18 define half @fma_213_f16(half %x, half %y, half %z) {
19 ; CHECK-LABEL: fma_213_f16:
21 ; CHECK-NEXT: vfmadd213sh %xmm2, %xmm1, %xmm0
23 %a = call half @llvm.fma.f16(half %y, half %x, half %z)
27 define half @fma_231_f16(half %x, half %y, half %z) {
28 ; CHECK-LABEL: fma_231_f16:
30 ; CHECK-NEXT: vfmadd231sh %xmm1, %xmm2, %xmm0
32 %a = call half @llvm.fma.f16(half %y, half %z, half %x)
36 define half @fma_321_f16(half %x, half %y, half %z) {
37 ; CHECK-LABEL: fma_321_f16:
39 ; CHECK-NEXT: vfmadd231sh %xmm1, %xmm2, %xmm0
41 %a = call half @llvm.fma.f16(half %z, half %y, half %x)
45 define half @fma_132_f16(half %x, half %y, half %z) {
46 ; CHECK-LABEL: fma_132_f16:
48 ; CHECK-NEXT: vfmadd213sh %xmm1, %xmm2, %xmm0
50 %a = call half @llvm.fma.f16(half %x, half %z, half %y)
54 define half @fma_312_f16(half %x, half %y, half %z) {
55 ; CHECK-LABEL: fma_312_f16:
57 ; CHECK-NEXT: vfmadd213sh %xmm1, %xmm2, %xmm0
59 %a = call half @llvm.fma.f16(half %z, half %x, half %y)
63 define half @fma_load_123_f16(half %x, half %y, ptr %zp) {
64 ; CHECK-LABEL: fma_load_123_f16:
66 ; CHECK-NEXT: vfmadd213sh (%rdi), %xmm1, %xmm0
68 %z = load half, ptr %zp
69 %a = call half @llvm.fma.f16(half %x, half %y, half %z)
73 define half @fma_load_213_f16(half %x, half %y, ptr %zp) {
74 ; CHECK-LABEL: fma_load_213_f16:
76 ; CHECK-NEXT: vfmadd213sh (%rdi), %xmm1, %xmm0
78 %z = load half, ptr %zp
79 %a = call half @llvm.fma.f16(half %y, half %x, half %z)
83 define half @fma_load_231_f16(half %x, half %y, ptr %zp) {
84 ; CHECK-LABEL: fma_load_231_f16:
86 ; CHECK-NEXT: vfmadd231sh (%rdi), %xmm1, %xmm0
88 %z = load half, ptr %zp
89 %a = call half @llvm.fma.f16(half %y, half %z, half %x)
93 define half @fma_load_321_f16(half %x, half %y, ptr %zp) {
94 ; CHECK-LABEL: fma_load_321_f16:
96 ; CHECK-NEXT: vfmadd231sh (%rdi), %xmm1, %xmm0
98 %z = load half, ptr %zp
99 %a = call half @llvm.fma.f16(half %z, half %y, half %x)
103 define half @fma_load_132_f16(half %x, half %y, ptr %zp) {
104 ; CHECK-LABEL: fma_load_132_f16:
106 ; CHECK-NEXT: vfmadd132sh (%rdi), %xmm1, %xmm0
108 %z = load half, ptr %zp
109 %a = call half @llvm.fma.f16(half %x, half %z, half %y)
113 define half @fma_load_312_f16(half %x, half %y, ptr %zp) {
114 ; CHECK-LABEL: fma_load_312_f16:
116 ; CHECK-NEXT: vfmadd132sh (%rdi), %xmm1, %xmm0
118 %z = load half, ptr %zp
119 %a = call half @llvm.fma.f16(half %z, half %x, half %y)
123 define <8 x half> @fma_123_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
124 ; CHECK-LABEL: fma_123_v8f16:
126 ; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0
128 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z)
132 define <8 x half> @fma_213_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
133 ; CHECK-LABEL: fma_213_v8f16:
135 ; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0
137 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z)
141 define <8 x half> @fma_231_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
142 ; CHECK-LABEL: fma_231_v8f16:
144 ; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0
146 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
150 define <8 x half> @fma_321_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
151 ; CHECK-LABEL: fma_321_v8f16:
153 ; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0
155 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x)
159 define <8 x half> @fma_132_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
160 ; CHECK-LABEL: fma_132_v8f16:
162 ; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0
164 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y)
168 define <8 x half> @fma_312_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z) {
169 ; CHECK-LABEL: fma_312_v8f16:
171 ; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0
173 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y)
177 define <8 x half> @fma_load_123_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) {
178 ; CHECK-LABEL: fma_load_123_v8f16:
180 ; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0
182 %z = load <8 x half>, ptr %zp
183 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z)
187 define <8 x half> @fma_load_213_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) {
188 ; CHECK-LABEL: fma_load_213_v8f16:
190 ; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0
192 %z = load <8 x half>, ptr %zp
193 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z)
197 define <8 x half> @fma_load_231_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) {
198 ; CHECK-LABEL: fma_load_231_v8f16:
200 ; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0
202 %z = load <8 x half>, ptr %zp
203 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
207 define <8 x half> @fma_load_321_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) {
208 ; CHECK-LABEL: fma_load_321_v8f16:
210 ; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0
212 %z = load <8 x half>, ptr %zp
213 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x)
217 define <8 x half> @fma_load_132_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) {
218 ; CHECK-LABEL: fma_load_132_v8f16:
220 ; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0
222 %z = load <8 x half>, ptr %zp
223 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y)
227 define <8 x half> @fma_load_312_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp) {
228 ; CHECK-LABEL: fma_load_312_v8f16:
230 ; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0
232 %z = load <8 x half>, ptr %zp
233 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y)
237 define <8 x half> @fma_mask_123_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
238 ; CHECK-LABEL: fma_mask_123_v8f16:
240 ; CHECK-NEXT: kmovd %edi, %k1
241 ; CHECK-NEXT: vfmadd132ph %xmm1, %xmm2, %xmm0 {%k1}
243 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z)
244 %b = bitcast i8 %mask to <8 x i1>
245 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
249 define <8 x half> @fma_mask_213_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
250 ; CHECK-LABEL: fma_mask_213_v8f16:
252 ; CHECK-NEXT: kmovd %edi, %k1
253 ; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1}
255 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z)
256 %b = bitcast i8 %mask to <8 x i1>
257 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
261 define <8 x half> @fma_mask_231_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
262 ; CHECK-LABEL: fma_mask_231_v8f16:
264 ; CHECK-NEXT: kmovd %edi, %k1
265 ; CHECK-NEXT: vfmadd231ph %xmm2, %xmm1, %xmm0 {%k1}
267 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
268 %b = bitcast i8 %mask to <8 x i1>
269 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
273 define <8 x half> @fma_mask_321_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
274 ; CHECK-LABEL: fma_mask_321_v8f16:
276 ; CHECK-NEXT: kmovd %edi, %k1
277 ; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 {%k1}
279 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x)
280 %b = bitcast i8 %mask to <8 x i1>
281 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
285 define <8 x half> @fma_mask_132_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
286 ; CHECK-LABEL: fma_mask_132_v8f16:
288 ; CHECK-NEXT: kmovd %edi, %k1
289 ; CHECK-NEXT: vfmadd132ph %xmm2, %xmm1, %xmm0 {%k1}
291 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y)
292 %b = bitcast i8 %mask to <8 x i1>
293 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
297 define <8 x half> @fma_mask_312_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
298 ; CHECK-LABEL: fma_mask_312_v8f16:
300 ; CHECK-NEXT: kmovd %edi, %k1
301 ; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 {%k1}
303 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y)
304 %b = bitcast i8 %mask to <8 x i1>
305 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
309 define <8 x half> @fma_maskz_123_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
310 ; CHECK-LABEL: fma_maskz_123_v8f16:
312 ; CHECK-NEXT: kmovd %edi, %k1
313 ; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} {z}
315 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z)
316 %b = bitcast i8 %mask to <8 x i1>
317 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
321 define <8 x half> @fma_maskz_213_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
322 ; CHECK-LABEL: fma_maskz_213_v8f16:
324 ; CHECK-NEXT: kmovd %edi, %k1
325 ; CHECK-NEXT: vfmadd213ph %xmm2, %xmm1, %xmm0 {%k1} {z}
327 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z)
328 %b = bitcast i8 %mask to <8 x i1>
329 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
333 define <8 x half> @fma_maskz_231_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
334 ; CHECK-LABEL: fma_maskz_231_v8f16:
336 ; CHECK-NEXT: kmovd %edi, %k1
337 ; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 {%k1} {z}
339 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
340 %b = bitcast i8 %mask to <8 x i1>
341 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
345 define <8 x half> @fma_maskz_321_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
346 ; CHECK-LABEL: fma_maskz_321_v8f16:
348 ; CHECK-NEXT: kmovd %edi, %k1
349 ; CHECK-NEXT: vfmadd231ph %xmm1, %xmm2, %xmm0 {%k1} {z}
351 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x)
352 %b = bitcast i8 %mask to <8 x i1>
353 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
357 define <8 x half> @fma_maskz_132_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
358 ; CHECK-LABEL: fma_maskz_132_v8f16:
360 ; CHECK-NEXT: kmovd %edi, %k1
361 ; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 {%k1} {z}
363 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y)
364 %b = bitcast i8 %mask to <8 x i1>
365 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
369 define <8 x half> @fma_maskz_312_v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z, i8 %mask) {
370 ; CHECK-LABEL: fma_maskz_312_v8f16:
372 ; CHECK-NEXT: kmovd %edi, %k1
373 ; CHECK-NEXT: vfmadd213ph %xmm1, %xmm2, %xmm0 {%k1} {z}
375 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y)
376 %b = bitcast i8 %mask to <8 x i1>
377 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
381 define <8 x half> @fma_mask_load_123_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
382 ; CHECK-LABEL: fma_mask_load_123_v8f16:
384 ; CHECK-NEXT: kmovd %esi, %k1
385 ; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1}
387 %z = load <8 x half>, ptr %zp
388 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z)
389 %b = bitcast i8 %mask to <8 x i1>
390 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
394 define <8 x half> @fma_mask_load_213_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
395 ; CHECK-LABEL: fma_mask_load_213_v8f16:
397 ; CHECK-NEXT: kmovd %esi, %k1
398 ; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1}
400 %z = load <8 x half>, ptr %zp
401 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z)
402 %b = bitcast i8 %mask to <8 x i1>
403 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
407 define <8 x half> @fma_mask_load_231_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
408 ; CHECK-LABEL: fma_mask_load_231_v8f16:
410 ; CHECK-NEXT: kmovd %esi, %k1
411 ; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1}
413 %z = load <8 x half>, ptr %zp
414 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
415 %b = bitcast i8 %mask to <8 x i1>
416 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
420 define <8 x half> @fma_mask_load_321_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
421 ; CHECK-LABEL: fma_mask_load_321_v8f16:
423 ; CHECK-NEXT: kmovd %esi, %k1
424 ; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1}
426 %z = load <8 x half>, ptr %zp
427 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x)
428 %b = bitcast i8 %mask to <8 x i1>
429 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
433 define <8 x half> @fma_mask_load_132_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
434 ; CHECK-LABEL: fma_mask_load_132_v8f16:
436 ; CHECK-NEXT: kmovd %esi, %k1
437 ; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1}
439 %z = load <8 x half>, ptr %zp
440 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y)
441 %b = bitcast i8 %mask to <8 x i1>
442 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
446 define <8 x half> @fma_mask_load_312_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
447 ; CHECK-LABEL: fma_mask_load_312_v8f16:
449 ; CHECK-NEXT: kmovd %esi, %k1
450 ; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1}
452 %z = load <8 x half>, ptr %zp
453 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y)
454 %b = bitcast i8 %mask to <8 x i1>
455 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> %x
459 define <8 x half> @fma_maskz_load_123_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
460 ; CHECK-LABEL: fma_maskz_load_123_v8f16:
462 ; CHECK-NEXT: kmovd %esi, %k1
463 ; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1} {z}
465 %z = load <8 x half>, ptr %zp
466 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %y, <8 x half> %z)
467 %b = bitcast i8 %mask to <8 x i1>
468 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
472 define <8 x half> @fma_maskz_load_213_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
473 ; CHECK-LABEL: fma_maskz_load_213_v8f16:
475 ; CHECK-NEXT: kmovd %esi, %k1
476 ; CHECK-NEXT: vfmadd213ph (%rdi), %xmm1, %xmm0 {%k1} {z}
478 %z = load <8 x half>, ptr %zp
479 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %x, <8 x half> %z)
480 %b = bitcast i8 %mask to <8 x i1>
481 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
485 define <8 x half> @fma_maskz_load_231_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
486 ; CHECK-LABEL: fma_maskz_load_231_v8f16:
488 ; CHECK-NEXT: kmovd %esi, %k1
489 ; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1} {z}
491 %z = load <8 x half>, ptr %zp
492 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %y, <8 x half> %z, <8 x half> %x)
493 %b = bitcast i8 %mask to <8 x i1>
494 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
498 define <8 x half> @fma_maskz_load_321_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
499 ; CHECK-LABEL: fma_maskz_load_321_v8f16:
501 ; CHECK-NEXT: kmovd %esi, %k1
502 ; CHECK-NEXT: vfmadd231ph (%rdi), %xmm1, %xmm0 {%k1} {z}
504 %z = load <8 x half>, ptr %zp
505 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %y, <8 x half> %x)
506 %b = bitcast i8 %mask to <8 x i1>
507 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
511 define <8 x half> @fma_maskz_load_132_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
512 ; CHECK-LABEL: fma_maskz_load_132_v8f16:
514 ; CHECK-NEXT: kmovd %esi, %k1
515 ; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1} {z}
517 %z = load <8 x half>, ptr %zp
518 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %x, <8 x half> %z, <8 x half> %y)
519 %b = bitcast i8 %mask to <8 x i1>
520 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
524 define <8 x half> @fma_maskz_load_312_v8f16(<8 x half> %x, <8 x half> %y, ptr %zp, i8 %mask) {
525 ; CHECK-LABEL: fma_maskz_load_312_v8f16:
527 ; CHECK-NEXT: kmovd %esi, %k1
528 ; CHECK-NEXT: vfmadd132ph (%rdi), %xmm1, %xmm0 {%k1} {z}
530 %z = load <8 x half>, ptr %zp
531 %a = call <8 x half> @llvm.fma.v8f16(<8 x half> %z, <8 x half> %x, <8 x half> %y)
532 %b = bitcast i8 %mask to <8 x i1>
533 %c = select <8 x i1> %b, <8 x half> %a, <8 x half> zeroinitializer
537 define <16 x half> @fma_123_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
538 ; CHECK-LABEL: fma_123_v16f16:
540 ; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0
542 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z)
546 define <16 x half> @fma_213_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
547 ; CHECK-LABEL: fma_213_v16f16:
549 ; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0
551 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z)
555 define <16 x half> @fma_231_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
556 ; CHECK-LABEL: fma_231_v16f16:
558 ; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0
560 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x)
564 define <16 x half> @fma_321_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
565 ; CHECK-LABEL: fma_321_v16f16:
567 ; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0
569 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x)
573 define <16 x half> @fma_132_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
574 ; CHECK-LABEL: fma_132_v16f16:
576 ; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0
578 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y)
582 define <16 x half> @fma_312_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z) {
583 ; CHECK-LABEL: fma_312_v16f16:
585 ; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0
587 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y)
591 define <16 x half> @fma_load_123_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) {
592 ; CHECK-LABEL: fma_load_123_v16f16:
594 ; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0
596 %z = load <16 x half>, ptr %zp
597 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z)
601 define <16 x half> @fma_load_213_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) {
602 ; CHECK-LABEL: fma_load_213_v16f16:
604 ; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0
606 %z = load <16 x half>, ptr %zp
607 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z)
611 define <16 x half> @fma_load_231_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) {
612 ; CHECK-LABEL: fma_load_231_v16f16:
614 ; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0
616 %z = load <16 x half>, ptr %zp
617 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x)
621 define <16 x half> @fma_load_321_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) {
622 ; CHECK-LABEL: fma_load_321_v16f16:
624 ; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0
626 %z = load <16 x half>, ptr %zp
627 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x)
631 define <16 x half> @fma_load_132_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) {
632 ; CHECK-LABEL: fma_load_132_v16f16:
634 ; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0
636 %z = load <16 x half>, ptr %zp
637 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y)
641 define <16 x half> @fma_load_312_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp) {
642 ; CHECK-LABEL: fma_load_312_v16f16:
644 ; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0
646 %z = load <16 x half>, ptr %zp
647 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y)
651 define <16 x half> @fma_mask_123_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
652 ; CHECK-LABEL: fma_mask_123_v16f16:
654 ; CHECK-NEXT: kmovd %edi, %k1
655 ; CHECK-NEXT: vfmadd132ph %ymm1, %ymm2, %ymm0 {%k1}
657 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z)
658 %b = bitcast i16 %mask to <16 x i1>
659 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
663 define <16 x half> @fma_mask_213_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
664 ; CHECK-LABEL: fma_mask_213_v16f16:
666 ; CHECK-NEXT: kmovd %edi, %k1
667 ; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1}
669 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z)
670 %b = bitcast i16 %mask to <16 x i1>
671 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
675 define <16 x half> @fma_mask_231_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
676 ; CHECK-LABEL: fma_mask_231_v16f16:
678 ; CHECK-NEXT: kmovd %edi, %k1
679 ; CHECK-NEXT: vfmadd231ph %ymm2, %ymm1, %ymm0 {%k1}
681 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x)
682 %b = bitcast i16 %mask to <16 x i1>
683 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
687 define <16 x half> @fma_mask_321_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
688 ; CHECK-LABEL: fma_mask_321_v16f16:
690 ; CHECK-NEXT: kmovd %edi, %k1
691 ; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 {%k1}
693 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x)
694 %b = bitcast i16 %mask to <16 x i1>
695 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
699 define <16 x half> @fma_mask_132_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
700 ; CHECK-LABEL: fma_mask_132_v16f16:
702 ; CHECK-NEXT: kmovd %edi, %k1
703 ; CHECK-NEXT: vfmadd132ph %ymm2, %ymm1, %ymm0 {%k1}
705 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y)
706 %b = bitcast i16 %mask to <16 x i1>
707 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
711 define <16 x half> @fma_mask_312_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
712 ; CHECK-LABEL: fma_mask_312_v16f16:
714 ; CHECK-NEXT: kmovd %edi, %k1
715 ; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 {%k1}
717 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y)
718 %b = bitcast i16 %mask to <16 x i1>
719 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
723 define <16 x half> @fma_maskz_123_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
724 ; CHECK-LABEL: fma_maskz_123_v16f16:
726 ; CHECK-NEXT: kmovd %edi, %k1
727 ; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} {z}
729 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z)
730 %b = bitcast i16 %mask to <16 x i1>
731 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
735 define <16 x half> @fma_maskz_213_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
736 ; CHECK-LABEL: fma_maskz_213_v16f16:
738 ; CHECK-NEXT: kmovd %edi, %k1
739 ; CHECK-NEXT: vfmadd213ph %ymm2, %ymm1, %ymm0 {%k1} {z}
741 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z)
742 %b = bitcast i16 %mask to <16 x i1>
743 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
747 define <16 x half> @fma_maskz_231_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
748 ; CHECK-LABEL: fma_maskz_231_v16f16:
750 ; CHECK-NEXT: kmovd %edi, %k1
751 ; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 {%k1} {z}
753 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x)
754 %b = bitcast i16 %mask to <16 x i1>
755 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
759 define <16 x half> @fma_maskz_321_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
760 ; CHECK-LABEL: fma_maskz_321_v16f16:
762 ; CHECK-NEXT: kmovd %edi, %k1
763 ; CHECK-NEXT: vfmadd231ph %ymm1, %ymm2, %ymm0 {%k1} {z}
765 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x)
766 %b = bitcast i16 %mask to <16 x i1>
767 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
771 define <16 x half> @fma_maskz_132_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
772 ; CHECK-LABEL: fma_maskz_132_v16f16:
774 ; CHECK-NEXT: kmovd %edi, %k1
775 ; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 {%k1} {z}
777 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y)
778 %b = bitcast i16 %mask to <16 x i1>
779 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
783 define <16 x half> @fma_maskz_312_v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z, i16 %mask) {
784 ; CHECK-LABEL: fma_maskz_312_v16f16:
786 ; CHECK-NEXT: kmovd %edi, %k1
787 ; CHECK-NEXT: vfmadd213ph %ymm1, %ymm2, %ymm0 {%k1} {z}
789 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y)
790 %b = bitcast i16 %mask to <16 x i1>
791 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
795 define <16 x half> @fma_mask_load_123_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
796 ; CHECK-LABEL: fma_mask_load_123_v16f16:
798 ; CHECK-NEXT: kmovd %esi, %k1
799 ; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1}
801 %z = load <16 x half>, ptr %zp
802 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z)
803 %b = bitcast i16 %mask to <16 x i1>
804 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
808 define <16 x half> @fma_mask_load_213_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
809 ; CHECK-LABEL: fma_mask_load_213_v16f16:
811 ; CHECK-NEXT: kmovd %esi, %k1
812 ; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1}
814 %z = load <16 x half>, ptr %zp
815 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z)
816 %b = bitcast i16 %mask to <16 x i1>
817 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
821 define <16 x half> @fma_mask_load_231_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
822 ; CHECK-LABEL: fma_mask_load_231_v16f16:
824 ; CHECK-NEXT: kmovd %esi, %k1
825 ; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1}
827 %z = load <16 x half>, ptr %zp
828 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x)
829 %b = bitcast i16 %mask to <16 x i1>
830 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
834 define <16 x half> @fma_mask_load_321_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
835 ; CHECK-LABEL: fma_mask_load_321_v16f16:
837 ; CHECK-NEXT: kmovd %esi, %k1
838 ; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1}
840 %z = load <16 x half>, ptr %zp
841 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x)
842 %b = bitcast i16 %mask to <16 x i1>
843 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
847 define <16 x half> @fma_mask_load_132_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
848 ; CHECK-LABEL: fma_mask_load_132_v16f16:
850 ; CHECK-NEXT: kmovd %esi, %k1
851 ; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1}
853 %z = load <16 x half>, ptr %zp
854 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y)
855 %b = bitcast i16 %mask to <16 x i1>
856 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
860 define <16 x half> @fma_mask_load_312_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
861 ; CHECK-LABEL: fma_mask_load_312_v16f16:
863 ; CHECK-NEXT: kmovd %esi, %k1
864 ; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1}
866 %z = load <16 x half>, ptr %zp
867 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y)
868 %b = bitcast i16 %mask to <16 x i1>
869 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> %x
873 define <16 x half> @fma_maskz_load_123_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
874 ; CHECK-LABEL: fma_maskz_load_123_v16f16:
876 ; CHECK-NEXT: kmovd %esi, %k1
877 ; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1} {z}
879 %z = load <16 x half>, ptr %zp
880 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %y, <16 x half> %z)
881 %b = bitcast i16 %mask to <16 x i1>
882 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
886 define <16 x half> @fma_maskz_load_213_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
887 ; CHECK-LABEL: fma_maskz_load_213_v16f16:
889 ; CHECK-NEXT: kmovd %esi, %k1
890 ; CHECK-NEXT: vfmadd213ph (%rdi), %ymm1, %ymm0 {%k1} {z}
892 %z = load <16 x half>, ptr %zp
893 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %x, <16 x half> %z)
894 %b = bitcast i16 %mask to <16 x i1>
895 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
899 define <16 x half> @fma_maskz_load_231_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
900 ; CHECK-LABEL: fma_maskz_load_231_v16f16:
902 ; CHECK-NEXT: kmovd %esi, %k1
903 ; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1} {z}
905 %z = load <16 x half>, ptr %zp
906 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %y, <16 x half> %z, <16 x half> %x)
907 %b = bitcast i16 %mask to <16 x i1>
908 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
912 define <16 x half> @fma_maskz_load_321_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
913 ; CHECK-LABEL: fma_maskz_load_321_v16f16:
915 ; CHECK-NEXT: kmovd %esi, %k1
916 ; CHECK-NEXT: vfmadd231ph (%rdi), %ymm1, %ymm0 {%k1} {z}
918 %z = load <16 x half>, ptr %zp
919 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %y, <16 x half> %x)
920 %b = bitcast i16 %mask to <16 x i1>
921 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
925 define <16 x half> @fma_maskz_load_132_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
926 ; CHECK-LABEL: fma_maskz_load_132_v16f16:
928 ; CHECK-NEXT: kmovd %esi, %k1
929 ; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1} {z}
931 %z = load <16 x half>, ptr %zp
932 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %x, <16 x half> %z, <16 x half> %y)
933 %b = bitcast i16 %mask to <16 x i1>
934 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
938 define <16 x half> @fma_maskz_load_312_v16f16(<16 x half> %x, <16 x half> %y, ptr %zp, i16 %mask) {
939 ; CHECK-LABEL: fma_maskz_load_312_v16f16:
941 ; CHECK-NEXT: kmovd %esi, %k1
942 ; CHECK-NEXT: vfmadd132ph (%rdi), %ymm1, %ymm0 {%k1} {z}
944 %z = load <16 x half>, ptr %zp
945 %a = call <16 x half> @llvm.fma.v16f16(<16 x half> %z, <16 x half> %x, <16 x half> %y)
946 %b = bitcast i16 %mask to <16 x i1>
947 %c = select <16 x i1> %b, <16 x half> %a, <16 x half> zeroinitializer
951 define <32 x half> @fma_123_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) {
952 ; CHECK-LABEL: fma_123_v32f16:
954 ; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0
956 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z)
960 define <32 x half> @fma_213_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) {
961 ; CHECK-LABEL: fma_213_v32f16:
963 ; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0
965 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z)
969 define <32 x half> @fma_231_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) {
970 ; CHECK-LABEL: fma_231_v32f16:
972 ; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0
974 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x)
978 define <32 x half> @fma_321_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) {
979 ; CHECK-LABEL: fma_321_v32f16:
981 ; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0
983 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x)
987 define <32 x half> @fma_132_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) {
988 ; CHECK-LABEL: fma_132_v32f16:
990 ; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0
992 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y)
996 define <32 x half> @fma_312_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z) {
997 ; CHECK-LABEL: fma_312_v32f16:
999 ; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0
1001 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y)
1005 define <32 x half> @fma_load_123_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) {
1006 ; CHECK-LABEL: fma_load_123_v32f16:
1008 ; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0
1010 %z = load <32 x half>, ptr %zp
1011 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z)
1015 define <32 x half> @fma_load_213_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) {
1016 ; CHECK-LABEL: fma_load_213_v32f16:
1018 ; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0
1020 %z = load <32 x half>, ptr %zp
1021 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z)
1025 define <32 x half> @fma_load_231_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) {
1026 ; CHECK-LABEL: fma_load_231_v32f16:
1028 ; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0
1030 %z = load <32 x half>, ptr %zp
1031 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x)
1035 define <32 x half> @fma_load_321_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) {
1036 ; CHECK-LABEL: fma_load_321_v32f16:
1038 ; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0
1040 %z = load <32 x half>, ptr %zp
1041 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x)
1045 define <32 x half> @fma_load_132_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) {
1046 ; CHECK-LABEL: fma_load_132_v32f16:
1048 ; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0
1050 %z = load <32 x half>, ptr %zp
1051 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y)
1055 define <32 x half> @fma_load_312_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp) {
1056 ; CHECK-LABEL: fma_load_312_v32f16:
1058 ; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0
1060 %z = load <32 x half>, ptr %zp
1061 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y)
1065 define <32 x half> @fma_mask_123_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1066 ; CHECK-LABEL: fma_mask_123_v32f16:
1068 ; CHECK-NEXT: kmovd %edi, %k1
1069 ; CHECK-NEXT: vfmadd132ph %zmm1, %zmm2, %zmm0 {%k1}
1071 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z)
1072 %b = bitcast i32 %mask to <32 x i1>
1073 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1077 define <32 x half> @fma_mask_213_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1078 ; CHECK-LABEL: fma_mask_213_v32f16:
1080 ; CHECK-NEXT: kmovd %edi, %k1
1081 ; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1}
1083 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z)
1084 %b = bitcast i32 %mask to <32 x i1>
1085 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1089 define <32 x half> @fma_mask_231_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1090 ; CHECK-LABEL: fma_mask_231_v32f16:
1092 ; CHECK-NEXT: kmovd %edi, %k1
1093 ; CHECK-NEXT: vfmadd231ph %zmm2, %zmm1, %zmm0 {%k1}
1095 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x)
1096 %b = bitcast i32 %mask to <32 x i1>
1097 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1101 define <32 x half> @fma_mask_321_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1102 ; CHECK-LABEL: fma_mask_321_v32f16:
1104 ; CHECK-NEXT: kmovd %edi, %k1
1105 ; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 {%k1}
1107 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x)
1108 %b = bitcast i32 %mask to <32 x i1>
1109 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1113 define <32 x half> @fma_mask_132_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1114 ; CHECK-LABEL: fma_mask_132_v32f16:
1116 ; CHECK-NEXT: kmovd %edi, %k1
1117 ; CHECK-NEXT: vfmadd132ph %zmm2, %zmm1, %zmm0 {%k1}
1119 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y)
1120 %b = bitcast i32 %mask to <32 x i1>
1121 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1125 define <32 x half> @fma_mask_312_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1126 ; CHECK-LABEL: fma_mask_312_v32f16:
1128 ; CHECK-NEXT: kmovd %edi, %k1
1129 ; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 {%k1}
1131 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y)
1132 %b = bitcast i32 %mask to <32 x i1>
1133 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1137 define <32 x half> @fma_maskz_123_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1138 ; CHECK-LABEL: fma_maskz_123_v32f16:
1140 ; CHECK-NEXT: kmovd %edi, %k1
1141 ; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} {z}
1143 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z)
1144 %b = bitcast i32 %mask to <32 x i1>
1145 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1149 define <32 x half> @fma_maskz_213_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1150 ; CHECK-LABEL: fma_maskz_213_v32f16:
1152 ; CHECK-NEXT: kmovd %edi, %k1
1153 ; CHECK-NEXT: vfmadd213ph %zmm2, %zmm1, %zmm0 {%k1} {z}
1155 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z)
1156 %b = bitcast i32 %mask to <32 x i1>
1157 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1161 define <32 x half> @fma_maskz_231_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1162 ; CHECK-LABEL: fma_maskz_231_v32f16:
1164 ; CHECK-NEXT: kmovd %edi, %k1
1165 ; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 {%k1} {z}
1167 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x)
1168 %b = bitcast i32 %mask to <32 x i1>
1169 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1173 define <32 x half> @fma_maskz_321_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1174 ; CHECK-LABEL: fma_maskz_321_v32f16:
1176 ; CHECK-NEXT: kmovd %edi, %k1
1177 ; CHECK-NEXT: vfmadd231ph %zmm1, %zmm2, %zmm0 {%k1} {z}
1179 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x)
1180 %b = bitcast i32 %mask to <32 x i1>
1181 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1185 define <32 x half> @fma_maskz_132_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1186 ; CHECK-LABEL: fma_maskz_132_v32f16:
1188 ; CHECK-NEXT: kmovd %edi, %k1
1189 ; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 {%k1} {z}
1191 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y)
1192 %b = bitcast i32 %mask to <32 x i1>
1193 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1197 define <32 x half> @fma_maskz_312_v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z, i32 %mask) {
1198 ; CHECK-LABEL: fma_maskz_312_v32f16:
1200 ; CHECK-NEXT: kmovd %edi, %k1
1201 ; CHECK-NEXT: vfmadd213ph %zmm1, %zmm2, %zmm0 {%k1} {z}
1203 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y)
1204 %b = bitcast i32 %mask to <32 x i1>
1205 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1209 define <32 x half> @fma_mask_load_123_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1210 ; CHECK-LABEL: fma_mask_load_123_v32f16:
1212 ; CHECK-NEXT: kmovd %esi, %k1
1213 ; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1}
1215 %z = load <32 x half>, ptr %zp
1216 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z)
1217 %b = bitcast i32 %mask to <32 x i1>
1218 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1222 define <32 x half> @fma_mask_load_213_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1223 ; CHECK-LABEL: fma_mask_load_213_v32f16:
1225 ; CHECK-NEXT: kmovd %esi, %k1
1226 ; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1}
1228 %z = load <32 x half>, ptr %zp
1229 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z)
1230 %b = bitcast i32 %mask to <32 x i1>
1231 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1235 define <32 x half> @fma_mask_load_231_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1236 ; CHECK-LABEL: fma_mask_load_231_v32f16:
1238 ; CHECK-NEXT: kmovd %esi, %k1
1239 ; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1}
1241 %z = load <32 x half>, ptr %zp
1242 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x)
1243 %b = bitcast i32 %mask to <32 x i1>
1244 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1248 define <32 x half> @fma_mask_load_321_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1249 ; CHECK-LABEL: fma_mask_load_321_v32f16:
1251 ; CHECK-NEXT: kmovd %esi, %k1
1252 ; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1}
1254 %z = load <32 x half>, ptr %zp
1255 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x)
1256 %b = bitcast i32 %mask to <32 x i1>
1257 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1261 define <32 x half> @fma_mask_load_132_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1262 ; CHECK-LABEL: fma_mask_load_132_v32f16:
1264 ; CHECK-NEXT: kmovd %esi, %k1
1265 ; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1}
1267 %z = load <32 x half>, ptr %zp
1268 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y)
1269 %b = bitcast i32 %mask to <32 x i1>
1270 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1274 define <32 x half> @fma_mask_load_312_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1275 ; CHECK-LABEL: fma_mask_load_312_v32f16:
1277 ; CHECK-NEXT: kmovd %esi, %k1
1278 ; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1}
1280 %z = load <32 x half>, ptr %zp
1281 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y)
1282 %b = bitcast i32 %mask to <32 x i1>
1283 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> %x
1287 define <32 x half> @fma_maskz_load_123_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1288 ; CHECK-LABEL: fma_maskz_load_123_v32f16:
1290 ; CHECK-NEXT: kmovd %esi, %k1
1291 ; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1} {z}
1293 %z = load <32 x half>, ptr %zp
1294 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %y, <32 x half> %z)
1295 %b = bitcast i32 %mask to <32 x i1>
1296 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1300 define <32 x half> @fma_maskz_load_213_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1301 ; CHECK-LABEL: fma_maskz_load_213_v32f16:
1303 ; CHECK-NEXT: kmovd %esi, %k1
1304 ; CHECK-NEXT: vfmadd213ph (%rdi), %zmm1, %zmm0 {%k1} {z}
1306 %z = load <32 x half>, ptr %zp
1307 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %x, <32 x half> %z)
1308 %b = bitcast i32 %mask to <32 x i1>
1309 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1313 define <32 x half> @fma_maskz_load_231_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1314 ; CHECK-LABEL: fma_maskz_load_231_v32f16:
1316 ; CHECK-NEXT: kmovd %esi, %k1
1317 ; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1} {z}
1319 %z = load <32 x half>, ptr %zp
1320 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %y, <32 x half> %z, <32 x half> %x)
1321 %b = bitcast i32 %mask to <32 x i1>
1322 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1326 define <32 x half> @fma_maskz_load_321_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1327 ; CHECK-LABEL: fma_maskz_load_321_v32f16:
1329 ; CHECK-NEXT: kmovd %esi, %k1
1330 ; CHECK-NEXT: vfmadd231ph (%rdi), %zmm1, %zmm0 {%k1} {z}
1332 %z = load <32 x half>, ptr %zp
1333 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %y, <32 x half> %x)
1334 %b = bitcast i32 %mask to <32 x i1>
1335 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1339 define <32 x half> @fma_maskz_load_132_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1340 ; CHECK-LABEL: fma_maskz_load_132_v32f16:
1342 ; CHECK-NEXT: kmovd %esi, %k1
1343 ; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1} {z}
1345 %z = load <32 x half>, ptr %zp
1346 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %x, <32 x half> %z, <32 x half> %y)
1347 %b = bitcast i32 %mask to <32 x i1>
1348 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer
1352 define <32 x half> @fma_maskz_load_312_v32f16(<32 x half> %x, <32 x half> %y, ptr %zp, i32 %mask) {
1353 ; CHECK-LABEL: fma_maskz_load_312_v32f16:
1355 ; CHECK-NEXT: kmovd %esi, %k1
1356 ; CHECK-NEXT: vfmadd132ph (%rdi), %zmm1, %zmm0 {%k1} {z}
1358 %z = load <32 x half>, ptr %zp
1359 %a = call <32 x half> @llvm.fma.v32f16(<32 x half> %z, <32 x half> %x, <32 x half> %y)
1360 %b = bitcast i32 %mask to <32 x i1>
1361 %c = select <32 x i1> %b, <32 x half> %a, <32 x half> zeroinitializer