1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32)
6 declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i32)
8 define <16 x float> @test_x86_vfnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
9 ; CHECK-LABEL: test_x86_vfnmadd_ps_z:
11 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2]
12 ; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2
13 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
14 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
15 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2)
19 define <16 x float> @test_mask_vfnmadd_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
20 ; X86-LABEL: test_mask_vfnmadd_ps:
22 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
23 ; X86-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
24 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
25 ; X86-NEXT: retl # encoding: [0xc3]
27 ; X64-LABEL: test_mask_vfnmadd_ps:
29 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
30 ; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
31 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
32 ; X64-NEXT: retq # encoding: [0xc3]
33 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
34 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %a2)
35 %3 = bitcast i16 %mask to <16 x i1>
36 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %a0
40 define <8 x double> @test_x86_vfnmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
41 ; CHECK-LABEL: test_x86_vfnmadd_pd_z:
43 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2]
44 ; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) + zmm2
45 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
46 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
47 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2)
51 define <8 x double> @test_mask_vfnmadd_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
52 ; X86-LABEL: test_mask_vfnmadd_pd:
54 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
55 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
56 ; X86-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
57 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
58 ; X86-NEXT: retl # encoding: [0xc3]
60 ; X64-LABEL: test_mask_vfnmadd_pd:
62 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
63 ; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
64 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
65 ; X64-NEXT: retq # encoding: [0xc3]
66 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
67 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %a2)
68 %3 = bitcast i8 %mask to <8 x i1>
69 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %a0
73 define <16 x float> @test_x86_vfnmsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
74 ; CHECK-LABEL: test_x86_vfnmsubps_z:
76 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2]
77 ; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
78 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
79 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
80 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
81 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2)
85 define <16 x float> @test_mask_vfnmsub_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
86 ; X86-LABEL: test_mask_vfnmsub_ps:
88 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
89 ; X86-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
90 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
91 ; X86-NEXT: retl # encoding: [0xc3]
93 ; X64-LABEL: test_mask_vfnmsub_ps:
95 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
96 ; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
97 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
98 ; X64-NEXT: retq # encoding: [0xc3]
99 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a1
100 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a2
101 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %a0, <16 x float> %1, <16 x float> %2)
102 %4 = bitcast i16 %mask to <16 x i1>
103 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %a0
107 define <8 x double> @test_x86_vfnmsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
108 ; CHECK-LABEL: test_x86_vfnmsubpd_z:
110 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
111 ; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
112 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
113 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
114 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
115 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
119 define <8 x double> @test_mask_vfnmsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
120 ; X86-LABEL: test_mask_vfnmsub_pd:
122 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
123 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
124 ; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
125 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
126 ; X86-NEXT: retl # encoding: [0xc3]
128 ; X64-LABEL: test_mask_vfnmsub_pd:
130 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
131 ; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
132 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
133 ; X64-NEXT: retq # encoding: [0xc3]
134 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
135 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
136 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
137 %4 = bitcast i8 %mask to <8 x i1>
138 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
142 define <16 x float> @test_x86_vfmaddsubps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
143 ; CHECK-LABEL: test_x86_vfmaddsubps_z:
145 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2]
146 ; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
147 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
148 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) #2
149 ret <16 x float> %res
152 define <16 x float> @test_mask_fmaddsub_ps(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
153 ; X86-LABEL: test_mask_fmaddsub_ps:
155 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
156 ; X86-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
157 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
158 ; X86-NEXT: retl # encoding: [0xc3]
160 ; X64-LABEL: test_mask_fmaddsub_ps:
162 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
163 ; X64-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
164 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
165 ; X64-NEXT: retq # encoding: [0xc3]
166 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i32 4)
167 %bc = bitcast i16 %mask to <16 x i1>
168 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a
169 ret <16 x float> %sel
172 declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) nounwind readnone
174 define <8 x double> @test_x86_vfmaddsubpd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
175 ; CHECK-LABEL: test_x86_vfmaddsubpd_z:
177 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2]
178 ; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) +/- zmm2
179 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
180 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2
181 ret <8 x double> %res
183 declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) nounwind readnone
185 define <8 x double> @test_mask_vfmaddsub_pd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
186 ; X86-LABEL: test_mask_vfmaddsub_pd:
188 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
189 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
190 ; X86-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
191 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
192 ; X86-NEXT: retl # encoding: [0xc3]
194 ; X64-LABEL: test_mask_vfmaddsub_pd:
196 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
197 ; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
198 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
199 ; X64-NEXT: retq # encoding: [0xc3]
200 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) #2
201 %bc = bitcast i8 %mask to <8 x i1>
202 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
203 ret <8 x double> %sel
206 define <8 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
207 ; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
209 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
210 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
211 ; X86-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
212 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
213 ; X86-NEXT: retl # encoding: [0xc3]
215 ; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_512:
217 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
218 ; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
219 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
220 ; X64-NEXT: retq # encoding: [0xc3]
221 %res = call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i32 4)
222 %bc = bitcast i8 %x3 to <8 x i1>
223 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %x0
224 ret <8 x double> %sel
227 define <8 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
228 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
230 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
231 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
232 ; X86-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1]
233 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
234 ; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
235 ; X86-NEXT: retl # encoding: [0xc3]
237 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_512:
239 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
240 ; X64-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1]
241 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
242 ; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
243 ; X64-NEXT: retq # encoding: [0xc3]
244 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
245 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
246 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
247 %4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
248 %5 = bitcast i8 %x3 to <8 x i1>
249 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2
253 define <8 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
254 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
256 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
257 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
258 ; X86-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
259 ; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
260 ; X86-NEXT: retl # encoding: [0xc3]
262 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_512:
264 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
265 ; X64-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
266 ; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
267 ; X64-NEXT: retq # encoding: [0xc3]
268 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
269 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
270 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
271 %4 = shufflevector <8 x double> %3, <8 x double> %1, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
272 %5 = bitcast i8 %x3 to <8 x i1>
273 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> zeroinitializer
277 define <16 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
278 ; X86-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
280 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
281 ; X86-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
282 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
283 ; X86-NEXT: retl # encoding: [0xc3]
285 ; X64-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_512:
287 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
288 ; X64-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
289 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) +/- zmm2
290 ; X64-NEXT: retq # encoding: [0xc3]
291 %res = call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i32 4)
292 %bc = bitcast i16 %x3 to <16 x i1>
293 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %x0
294 ret <16 x float> %sel
297 define <16 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
298 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
300 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
301 ; X86-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1]
302 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
303 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
304 ; X86-NEXT: retl # encoding: [0xc3]
306 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_512:
308 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
309 ; X64-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1]
310 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) +/- zmm2
311 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
312 ; X64-NEXT: retq # encoding: [0xc3]
313 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
314 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
315 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
316 %4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
317 %5 = bitcast i16 %x3 to <16 x i1>
318 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2
322 define <16 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
323 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
325 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
326 ; X86-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
327 ; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
328 ; X86-NEXT: retl # encoding: [0xc3]
330 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_512:
332 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
333 ; X64-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
334 ; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) +/- zmm2
335 ; X64-NEXT: retq # encoding: [0xc3]
336 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
337 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
338 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
339 %4 = shufflevector <16 x float> %3, <16 x float> %1, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
340 %5 = bitcast i16 %x3 to <16 x i1>
341 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> zeroinitializer
345 define <8 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
346 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
348 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
349 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
350 ; X86-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1]
351 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
352 ; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
353 ; X86-NEXT: retl # encoding: [0xc3]
355 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_512:
357 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
358 ; X64-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1]
359 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
360 ; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
361 ; X64-NEXT: retq # encoding: [0xc3]
362 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
363 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
364 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %2)
365 %4 = shufflevector <8 x double> %1, <8 x double> %3, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
366 %5 = bitcast i8 %x3 to <8 x i1>
367 %6 = select <8 x i1> %5, <8 x double> %4, <8 x double> %x2
371 define <16 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
372 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
374 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
375 ; X86-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1]
376 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
377 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
378 ; X86-NEXT: retl # encoding: [0xc3]
380 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_512:
382 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
383 ; X64-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1]
384 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) -/+ zmm2
385 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
386 ; X64-NEXT: retq # encoding: [0xc3]
387 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
388 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
389 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %2)
390 %4 = shufflevector <16 x float> %1, <16 x float> %3, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
391 %5 = bitcast i16 %x3 to <16 x i1>
392 %6 = select <16 x i1> %5, <16 x float> %4, <16 x float> %x2
396 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
397 ; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
399 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
400 ; X86-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
401 ; X86-NEXT: retl # encoding: [0xc3]
403 ; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rne:
405 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
406 ; X64-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
407 ; X64-NEXT: retq # encoding: [0xc3]
408 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
409 %bc = bitcast i16 %mask to <16 x i1>
410 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
411 ret <16 x float> %sel
414 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
415 ; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
417 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
418 ; X86-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1]
419 ; X86-NEXT: retl # encoding: [0xc3]
421 ; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtn:
423 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
424 ; X64-NEXT: vfmadd132ps {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x39,0x98,0xc1]
425 ; X64-NEXT: retq # encoding: [0xc3]
426 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
427 %bc = bitcast i16 %mask to <16 x i1>
428 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
429 ret <16 x float> %sel
432 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
433 ; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
435 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
436 ; X86-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1]
437 ; X86-NEXT: retl # encoding: [0xc3]
439 ; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtp:
441 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
442 ; X64-NEXT: vfmadd132ps {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x59,0x98,0xc1]
443 ; X64-NEXT: retq # encoding: [0xc3]
444 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
445 %bc = bitcast i16 %mask to <16 x i1>
446 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
447 ret <16 x float> %sel
450 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
451 ; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
453 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
454 ; X86-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1]
455 ; X86-NEXT: retl # encoding: [0xc3]
457 ; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_rtz:
459 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
460 ; X64-NEXT: vfmadd132ps {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x79,0x98,0xc1]
461 ; X64-NEXT: retq # encoding: [0xc3]
462 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
463 %bc = bitcast i16 %mask to <16 x i1>
464 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
465 ret <16 x float> %sel
468 define <16 x float> @test_mask_round_vfmadd512_ps_rrb_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i16 %mask) {
469 ; X86-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
471 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
472 ; X86-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
473 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
474 ; X86-NEXT: retl # encoding: [0xc3]
476 ; X64-LABEL: test_mask_round_vfmadd512_ps_rrb_current:
478 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
479 ; X64-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
480 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
481 ; X64-NEXT: retq # encoding: [0xc3]
482 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind
483 %bc = bitcast i16 %mask to <16 x i1>
484 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %a0
485 ret <16 x float> %sel
488 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rne(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
489 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rne:
491 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
492 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
493 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 8) nounwind
494 ret <16 x float> %res
497 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtn(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
498 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtn:
500 ; CHECK-NEXT: vfmadd213ps {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x38,0xa8,0xc2]
501 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
502 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 9) nounwind
503 ret <16 x float> %res
506 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtp(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
507 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtp:
509 ; CHECK-NEXT: vfmadd213ps {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x58,0xa8,0xc2]
510 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
511 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 10) nounwind
512 ret <16 x float> %res
515 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_rtz(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
516 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_rtz:
518 ; CHECK-NEXT: vfmadd213ps {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x78,0xa8,0xc2]
519 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
520 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 11) nounwind
521 ret <16 x float> %res
524 define <16 x float> @test_mask_round_vfmadd512_ps_rrbz_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
525 ; CHECK-LABEL: test_mask_round_vfmadd512_ps_rrbz_current:
527 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
528 ; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2
529 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
530 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2, i32 4) nounwind
531 ret <16 x float> %res
534 define <8 x double>@test_int_x86_avx512_mask3_vfmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
535 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
537 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
538 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
539 ; X86-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1]
540 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
541 ; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
542 ; X86-NEXT: retl # encoding: [0xc3]
544 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_512:
546 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
547 ; X64-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1]
548 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
549 ; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
550 ; X64-NEXT: retq # encoding: [0xc3]
551 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
552 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %1)
553 %3 = bitcast i8 %x3 to <8 x i1>
554 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x2
558 define <16 x float>@test_int_x86_avx512_mask3_vfmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
559 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
561 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
562 ; X86-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1]
563 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
564 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
565 ; X86-NEXT: retl # encoding: [0xc3]
567 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_512:
569 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
570 ; X64-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1]
571 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) - zmm2
572 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
573 ; X64-NEXT: retq # encoding: [0xc3]
574 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
575 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %1)
576 %3 = bitcast i16 %x3 to <16 x i1>
577 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x2
581 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
582 ; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
584 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
585 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
586 ; X86-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
587 ; X86-NEXT: retl # encoding: [0xc3]
589 ; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rne:
591 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
592 ; X64-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
593 ; X64-NEXT: retq # encoding: [0xc3]
594 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
595 %bc = bitcast i8 %mask to <8 x i1>
596 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
597 ret <8 x double> %sel
600 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
601 ; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
603 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
604 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
605 ; X86-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1]
606 ; X86-NEXT: retl # encoding: [0xc3]
608 ; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtn:
610 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
611 ; X64-NEXT: vfmadd132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x98,0xc1]
612 ; X64-NEXT: retq # encoding: [0xc3]
613 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
614 %bc = bitcast i8 %mask to <8 x i1>
615 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
616 ret <8 x double> %sel
619 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
620 ; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
622 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
623 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
624 ; X86-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1]
625 ; X86-NEXT: retl # encoding: [0xc3]
627 ; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtp:
629 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
630 ; X64-NEXT: vfmadd132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x98,0xc1]
631 ; X64-NEXT: retq # encoding: [0xc3]
632 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
633 %bc = bitcast i8 %mask to <8 x i1>
634 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
635 ret <8 x double> %sel
638 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
639 ; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
641 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
642 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
643 ; X86-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1]
644 ; X86-NEXT: retl # encoding: [0xc3]
646 ; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_rtz:
648 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
649 ; X64-NEXT: vfmadd132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x98,0xc1]
650 ; X64-NEXT: retq # encoding: [0xc3]
651 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
652 %bc = bitcast i8 %mask to <8 x i1>
653 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
654 ret <8 x double> %sel
657 define <8 x double> @test_mask_round_vfmadd512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
658 ; X86-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
660 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
661 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
662 ; X86-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
663 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
664 ; X86-NEXT: retl # encoding: [0xc3]
666 ; X64-LABEL: test_mask_round_vfmadd512_pd_rrb_current:
668 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
669 ; X64-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
670 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
671 ; X64-NEXT: retq # encoding: [0xc3]
672 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind
673 %bc = bitcast i8 %mask to <8 x i1>
674 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %a0
675 ret <8 x double> %sel
678 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
679 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rne:
681 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
682 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
683 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 8) nounwind
684 ret <8 x double> %res
687 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
688 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtn:
690 ; CHECK-NEXT: vfmadd213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xa8,0xc2]
691 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
692 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 9) nounwind
693 ret <8 x double> %res
696 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
697 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtp:
699 ; CHECK-NEXT: vfmadd213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xa8,0xc2]
700 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
701 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 10) nounwind
702 ret <8 x double> %res
705 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
706 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_rtz:
708 ; CHECK-NEXT: vfmadd213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xa8,0xc2]
709 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
710 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 11) nounwind
711 ret <8 x double> %res
714 define <8 x double> @test_mask_round_vfmadd512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
715 ; CHECK-LABEL: test_mask_round_vfmadd512_pd_rrbz_current:
717 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
718 ; CHECK-NEXT: # zmm0 = (zmm1 * zmm0) + zmm2
719 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
720 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i32 4) nounwind
721 ret <8 x double> %res
724 define <8 x double>@test_int_x86_avx512_mask_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
725 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
727 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
728 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
729 ; X86-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
730 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
731 ; X86-NEXT: retl # encoding: [0xc3]
733 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_pd_512:
735 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
736 ; X64-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
737 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
738 ; X64-NEXT: retq # encoding: [0xc3]
739 %res = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i32 4)
740 %bc = bitcast i8 %x3 to <8 x i1>
741 %sel = select <8 x i1> %bc, <8 x double> %res, <8 x double> %x0
742 ret <8 x double> %sel
745 define <8 x double>@test_int_x86_avx512_mask3_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
746 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
748 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
749 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
750 ; X86-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1]
751 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
752 ; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
753 ; X86-NEXT: retl # encoding: [0xc3]
755 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_512:
757 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
758 ; X64-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1]
759 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
760 ; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
761 ; X64-NEXT: retq # encoding: [0xc3]
762 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
763 %2 = bitcast i8 %x3 to <8 x i1>
764 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
768 define <8 x double>@test_int_x86_avx512_maskz_vfmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
769 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
771 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
772 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
773 ; X86-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
774 ; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
775 ; X86-NEXT: retl # encoding: [0xc3]
777 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_512:
779 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
780 ; X64-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
781 ; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
782 ; X64-NEXT: retq # encoding: [0xc3]
783 %1 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2)
784 %2 = bitcast i8 %x3 to <8 x i1>
785 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
789 define <16 x float>@test_int_x86_avx512_mask_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
790 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
792 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
793 ; X86-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
794 ; X86-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
795 ; X86-NEXT: retl # encoding: [0xc3]
797 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ps_512:
799 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
800 ; X64-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
801 ; X64-NEXT: # zmm0 {%k1} = (zmm0 * zmm1) + zmm2
802 ; X64-NEXT: retq # encoding: [0xc3]
803 %res = call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i32 4)
804 %bc = bitcast i16 %x3 to <16 x i1>
805 %sel = select <16 x i1> %bc, <16 x float> %res, <16 x float> %x0
806 ret <16 x float> %sel
809 define <16 x float>@test_int_x86_avx512_mask3_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
810 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
812 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
813 ; X86-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1]
814 ; X86-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
815 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
816 ; X86-NEXT: retl # encoding: [0xc3]
818 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_512:
820 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
821 ; X64-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1]
822 ; X64-NEXT: # zmm2 {%k1} = (zmm0 * zmm1) + zmm2
823 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
824 ; X64-NEXT: retq # encoding: [0xc3]
825 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
826 %2 = bitcast i16 %x3 to <16 x i1>
827 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
831 define <16 x float> @test_int_x86_avx512_maskz_vfmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
832 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
834 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
835 ; X86-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
836 ; X86-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
837 ; X86-NEXT: retl # encoding: [0xc3]
839 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_512:
841 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
842 ; X64-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
843 ; X64-NEXT: # zmm0 {%k1} {z} = (zmm1 * zmm0) + zmm2
844 ; X64-NEXT: retq # encoding: [0xc3]
845 %1 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2)
846 %2 = bitcast i16 %x3 to <16 x i1>
847 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
851 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
852 ; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
854 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
855 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
856 ; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
857 ; X86-NEXT: retl # encoding: [0xc3]
859 ; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rne:
861 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
862 ; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
863 ; X64-NEXT: retq # encoding: [0xc3]
864 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
865 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
866 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
867 %4 = bitcast i8 %mask to <8 x i1>
868 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
872 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
873 ; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
875 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
876 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
877 ; X86-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1]
878 ; X86-NEXT: retl # encoding: [0xc3]
880 ; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtn:
882 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
883 ; X64-NEXT: vfnmsub132pd {rd-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x39,0x9e,0xc1]
884 ; X64-NEXT: retq # encoding: [0xc3]
885 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
886 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
887 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
888 %4 = bitcast i8 %mask to <8 x i1>
889 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
893 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
894 ; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
896 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
897 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
898 ; X86-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1]
899 ; X86-NEXT: retl # encoding: [0xc3]
901 ; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtp:
903 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
904 ; X64-NEXT: vfnmsub132pd {ru-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x59,0x9e,0xc1]
905 ; X64-NEXT: retq # encoding: [0xc3]
906 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
907 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
908 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
909 %4 = bitcast i8 %mask to <8 x i1>
910 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
914 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
915 ; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
917 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
918 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
919 ; X86-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1]
920 ; X86-NEXT: retl # encoding: [0xc3]
922 ; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_rtz:
924 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
925 ; X64-NEXT: vfnmsub132pd {rz-sae}, %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x79,0x9e,0xc1]
926 ; X64-NEXT: retq # encoding: [0xc3]
927 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
928 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
929 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
930 %4 = bitcast i8 %mask to <8 x i1>
931 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
935 define <8 x double> @test_mask_round_vfnmsub512_pd_rrb_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2, i8 %mask) {
936 ; X86-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
938 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
939 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
940 ; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
941 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
942 ; X86-NEXT: retl # encoding: [0xc3]
944 ; X64-LABEL: test_mask_round_vfnmsub512_pd_rrb_current:
946 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
947 ; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
948 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
949 ; X64-NEXT: retq # encoding: [0xc3]
950 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
951 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
952 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
953 %4 = bitcast i8 %mask to <8 x i1>
954 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %a0
958 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rne(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
959 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rne:
961 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
962 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
963 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
964 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
965 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 8)
969 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtn(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
970 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtn:
972 ; CHECK-NEXT: vfnmsub213pd {rd-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x38,0xae,0xc2]
973 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
974 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
975 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
976 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 9)
980 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtp(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
981 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtp:
983 ; CHECK-NEXT: vfnmsub213pd {ru-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x58,0xae,0xc2]
984 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
985 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
986 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
987 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 10)
991 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_rtz(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
992 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_rtz:
994 ; CHECK-NEXT: vfnmsub213pd {rz-sae}, %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x78,0xae,0xc2]
995 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
996 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
997 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
998 %3 = call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %a0, <8 x double> %1, <8 x double> %2, i32 11)
1002 define <8 x double> @test_mask_round_vfnmsub512_pd_rrbz_current(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
1003 ; CHECK-LABEL: test_mask_round_vfnmsub512_pd_rrbz_current:
1005 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
1006 ; CHECK-NEXT: # zmm0 = -(zmm1 * zmm0) - zmm2
1007 ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
1008 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a1
1009 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %a2
1010 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %a0, <8 x double> %1, <8 x double> %2)
1014 define <8 x double>@test_int_x86_avx512_mask_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
1015 ; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
1017 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1018 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1019 ; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
1020 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1021 ; X86-NEXT: retl # encoding: [0xc3]
1023 ; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_512:
1025 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1026 ; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
1027 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1028 ; X64-NEXT: retq # encoding: [0xc3]
1029 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1
1030 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
1031 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %2)
1032 %4 = bitcast i8 %x3 to <8 x i1>
1033 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x0
1037 define <8 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
1038 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
1040 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1041 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1042 ; X86-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1]
1043 ; X86-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1044 ; X86-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1045 ; X86-NEXT: retl # encoding: [0xc3]
1047 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_512:
1049 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1050 ; X64-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1]
1051 ; X64-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1052 ; X64-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1053 ; X64-NEXT: retq # encoding: [0xc3]
1054 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x0
1055 %2 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x2
1056 %3 = call <8 x double> @llvm.fma.v8f64(<8 x double> %1, <8 x double> %x1, <8 x double> %2)
1057 %4 = bitcast i8 %x3 to <8 x i1>
1058 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %x2
1062 define <16 x float>@test_int_x86_avx512_mask_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
1063 ; X86-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
1065 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1066 ; X86-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
1067 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1068 ; X86-NEXT: retl # encoding: [0xc3]
1070 ; X64-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_512:
1072 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1073 ; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
1074 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) - zmm2
1075 ; X64-NEXT: retq # encoding: [0xc3]
1076 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1
1077 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
1078 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %2)
1079 %4 = bitcast i16 %x3 to <16 x i1>
1080 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x0
1084 define <16 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
1085 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
1087 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1088 ; X86-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1]
1089 ; X86-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1090 ; X86-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1091 ; X86-NEXT: retl # encoding: [0xc3]
1093 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_512:
1095 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1096 ; X64-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1]
1097 ; X64-NEXT: # zmm2 {%k1} = -(zmm0 * zmm1) - zmm2
1098 ; X64-NEXT: vmovaps %zmm2, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1099 ; X64-NEXT: retq # encoding: [0xc3]
1100 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x0
1101 %2 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x2
1102 %3 = call <16 x float> @llvm.fma.v16f32(<16 x float> %1, <16 x float> %x1, <16 x float> %2)
1103 %4 = bitcast i16 %x3 to <16 x i1>
1104 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %x2
1108 define <8 x double>@test_int_x86_avx512_mask_vfnmadd_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3){
1109 ; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
1111 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
1112 ; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
1113 ; X86-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
1114 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1115 ; X86-NEXT: retl # encoding: [0xc3]
1117 ; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_512:
1119 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1120 ; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
1121 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1122 ; X64-NEXT: retq # encoding: [0xc3]
1123 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x1
1124 %2 = call <8 x double> @llvm.fma.v8f64(<8 x double> %x0, <8 x double> %1, <8 x double> %x2)
1125 %3 = bitcast i8 %x3 to <8 x i1>
1126 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %x0
1130 define <16 x float>@test_int_x86_avx512_mask_vfnmadd_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3){
1131 ; X86-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
1133 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1134 ; X86-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
1135 ; X86-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1136 ; X86-NEXT: retl # encoding: [0xc3]
1138 ; X64-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_512:
1140 ; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
1141 ; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
1142 ; X64-NEXT: # zmm0 {%k1} = -(zmm0 * zmm1) + zmm2
1143 ; X64-NEXT: retq # encoding: [0xc3]
1144 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x1
1145 %2 = call <16 x float> @llvm.fma.v16f32(<16 x float> %x0, <16 x float> %1, <16 x float> %x2)
1146 %3 = bitcast i16 %x3 to <16 x i1>
1147 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %x0
1151 ; This test case used to crash due to combineFMA not bitcasting results of isFNEG.
1152 define <4 x float> @foo() {
1154 ; X86: # %bb.0: # %entry
1155 ; X86-NEXT: vmovss (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
1156 ; X86-NEXT: # xmm0 = mem[0],zero,zero,zero
1157 ; X86-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
1158 ; X86-NEXT: retl # encoding: [0xc3]
1161 ; X64: # %bb.0: # %entry
1162 ; X64-NEXT: vmovss (%rax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x00]
1163 ; X64-NEXT: # xmm0 = mem[0],zero,zero,zero
1164 ; X64-NEXT: vfmsub213ss {rd-sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x38,0xab,0xc0]
1165 ; X64-NEXT: retq # encoding: [0xc3]
1167 %0 = load <4 x float>, <4 x float>* undef, align 16
1168 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
1169 %1 = extractelement <4 x float> %sub, i64 0
1170 %2 = call float @llvm.x86.avx512.vfmadd.f32(float undef, float undef, float %1, i32 9)
1171 %3 = select i1 extractelement (<8 x i1> bitcast (<1 x i8> <i8 1> to <8 x i1>), i64 0), float %2, float undef
1172 %4 = insertelement <4 x float> undef, float %3, i64 0
1176 ; Function Attrs: nounwind readnone
1177 declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32)
1179 declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>)
1180 declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>)