1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
7 define <8 x double> @test_mm512_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
8 ; CHECK-LABEL: test_mm512_fmadd_round_pd:
9 ; CHECK: ## %bb.0: ## %entry
10 ; CHECK-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa8,0xc2]
11 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
13 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
17 declare <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) #1
19 define <8 x double> @test_mm512_mask_fmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
20 ; X86-LABEL: test_mm512_mask_fmadd_round_pd:
21 ; X86: ## %bb.0: ## %entry
22 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
23 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
24 ; X86-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
25 ; X86-NEXT: retl ## encoding: [0xc3]
27 ; X64-LABEL: test_mm512_mask_fmadd_round_pd:
28 ; X64: ## %bb.0: ## %entry
29 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
30 ; X64-NEXT: vfmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x98,0xc1]
31 ; X64-NEXT: retq ## encoding: [0xc3]
33 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
34 %1 = bitcast i8 %__U to <8 x i1>
35 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
39 define <8 x double> @test_mm512_mask3_fmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
40 ; X86-LABEL: test_mm512_mask3_fmadd_round_pd:
41 ; X86: ## %bb.0: ## %entry
42 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
43 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
44 ; X86-NEXT: vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb8,0xd1]
45 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
46 ; X86-NEXT: retl ## encoding: [0xc3]
48 ; X64-LABEL: test_mm512_mask3_fmadd_round_pd:
49 ; X64: ## %bb.0: ## %entry
50 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
51 ; X64-NEXT: vfmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb8,0xd1]
52 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
53 ; X64-NEXT: retq ## encoding: [0xc3]
55 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
56 %1 = bitcast i8 %__U to <8 x i1>
57 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
61 define <8 x double> @test_mm512_maskz_fmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
62 ; X86-LABEL: test_mm512_maskz_fmadd_round_pd:
63 ; X86: ## %bb.0: ## %entry
64 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
65 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
66 ; X86-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa8,0xc2]
67 ; X86-NEXT: retl ## encoding: [0xc3]
69 ; X64-LABEL: test_mm512_maskz_fmadd_round_pd:
70 ; X64: ## %bb.0: ## %entry
71 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
72 ; X64-NEXT: vfmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa8,0xc2]
73 ; X64-NEXT: retq ## encoding: [0xc3]
75 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
76 %1 = bitcast i8 %__U to <8 x i1>
77 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
81 define <8 x double> @test_mm512_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
82 ; CHECK-LABEL: test_mm512_fmsub_round_pd:
83 ; CHECK: ## %bb.0: ## %entry
84 ; CHECK-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xaa,0xc2]
85 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
87 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
88 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
92 define <8 x double> @test_mm512_mask_fmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
93 ; X86-LABEL: test_mm512_mask_fmsub_round_pd:
94 ; X86: ## %bb.0: ## %entry
95 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
96 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
97 ; X86-NEXT: vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9a,0xc1]
98 ; X86-NEXT: retl ## encoding: [0xc3]
100 ; X64-LABEL: test_mm512_mask_fmsub_round_pd:
101 ; X64: ## %bb.0: ## %entry
102 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
103 ; X64-NEXT: vfmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9a,0xc1]
104 ; X64-NEXT: retq ## encoding: [0xc3]
106 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
107 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
108 %1 = bitcast i8 %__U to <8 x i1>
109 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
113 define <8 x double> @test_mm512_maskz_fmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
114 ; X86-LABEL: test_mm512_maskz_fmsub_round_pd:
115 ; X86: ## %bb.0: ## %entry
116 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
117 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
118 ; X86-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xaa,0xc2]
119 ; X86-NEXT: retl ## encoding: [0xc3]
121 ; X64-LABEL: test_mm512_maskz_fmsub_round_pd:
122 ; X64: ## %bb.0: ## %entry
123 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
124 ; X64-NEXT: vfmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xaa,0xc2]
125 ; X64-NEXT: retq ## encoding: [0xc3]
127 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
128 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
129 %1 = bitcast i8 %__U to <8 x i1>
130 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
134 define <8 x double> @test_mm512_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
135 ; CHECK-LABEL: test_mm512_fnmadd_round_pd:
136 ; CHECK: ## %bb.0: ## %entry
137 ; CHECK-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xac,0xc2]
138 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
140 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
141 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8)
145 define <8 x double> @test_mm512_mask3_fnmadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
146 ; X86-LABEL: test_mm512_mask3_fnmadd_round_pd:
147 ; X86: ## %bb.0: ## %entry
148 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
149 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
150 ; X86-NEXT: vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbc,0xd1]
151 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
152 ; X86-NEXT: retl ## encoding: [0xc3]
154 ; X64-LABEL: test_mm512_mask3_fnmadd_round_pd:
155 ; X64: ## %bb.0: ## %entry
156 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
157 ; X64-NEXT: vfnmadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbc,0xd1]
158 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
159 ; X64-NEXT: retq ## encoding: [0xc3]
161 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
162 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8)
163 %1 = bitcast i8 %__U to <8 x i1>
164 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
168 define <8 x double> @test_mm512_maskz_fnmadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
169 ; X86-LABEL: test_mm512_maskz_fnmadd_round_pd:
170 ; X86: ## %bb.0: ## %entry
171 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
172 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
173 ; X86-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xac,0xc2]
174 ; X86-NEXT: retl ## encoding: [0xc3]
176 ; X64-LABEL: test_mm512_maskz_fnmadd_round_pd:
177 ; X64: ## %bb.0: ## %entry
178 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
179 ; X64-NEXT: vfnmadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xac,0xc2]
180 ; X64-NEXT: retq ## encoding: [0xc3]
182 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
183 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8)
184 %1 = bitcast i8 %__U to <8 x i1>
185 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
189 define <8 x double> @test_mm512_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
190 ; CHECK-LABEL: test_mm512_fnmsub_round_pd:
191 ; CHECK: ## %bb.0: ## %entry
192 ; CHECK-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xae,0xc2]
193 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
195 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
196 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
197 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8)
201 define <8 x double> @test_mm512_maskz_fnmsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
202 ; X86-LABEL: test_mm512_maskz_fnmsub_round_pd:
203 ; X86: ## %bb.0: ## %entry
204 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
205 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
206 ; X86-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xae,0xc2]
207 ; X86-NEXT: retl ## encoding: [0xc3]
209 ; X64-LABEL: test_mm512_maskz_fnmsub_round_pd:
210 ; X64: ## %bb.0: ## %entry
211 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
212 ; X64-NEXT: vfnmsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xae,0xc2]
213 ; X64-NEXT: retq ## encoding: [0xc3]
215 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
216 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
217 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %sub1, i32 8)
218 %1 = bitcast i8 %__U to <8 x i1>
219 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
223 define <8 x double> @test_mm512_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
224 ; CHECK-LABEL: test_mm512_fmadd_pd:
225 ; CHECK: ## %bb.0: ## %entry
226 ; CHECK-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa8,0xc2]
227 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2
228 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
230 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
234 define <8 x double> @test_mm512_mask_fmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
235 ; X86-LABEL: test_mm512_mask_fmadd_pd:
236 ; X86: ## %bb.0: ## %entry
237 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
238 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
239 ; X86-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
240 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) + zmm2
241 ; X86-NEXT: retl ## encoding: [0xc3]
243 ; X64-LABEL: test_mm512_mask_fmadd_pd:
244 ; X64: ## %bb.0: ## %entry
245 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
246 ; X64-NEXT: vfmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x98,0xc1]
247 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) + zmm2
248 ; X64-NEXT: retq ## encoding: [0xc3]
250 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
251 %1 = bitcast i8 %__U to <8 x i1>
252 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
256 define <8 x double> @test_mm512_mask3_fmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
257 ; X86-LABEL: test_mm512_mask3_fmadd_pd:
258 ; X86: ## %bb.0: ## %entry
259 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
260 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
261 ; X86-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1]
262 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) + zmm2
263 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
264 ; X86-NEXT: retl ## encoding: [0xc3]
266 ; X64-LABEL: test_mm512_mask3_fmadd_pd:
267 ; X64: ## %bb.0: ## %entry
268 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
269 ; X64-NEXT: vfmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb8,0xd1]
270 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) + zmm2
271 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
272 ; X64-NEXT: retq ## encoding: [0xc3]
274 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
275 %1 = bitcast i8 %__U to <8 x i1>
276 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
280 define <8 x double> @test_mm512_maskz_fmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
281 ; X86-LABEL: test_mm512_maskz_fmadd_pd:
282 ; X86: ## %bb.0: ## %entry
283 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
284 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
285 ; X86-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
286 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2
287 ; X86-NEXT: retl ## encoding: [0xc3]
289 ; X64-LABEL: test_mm512_maskz_fmadd_pd:
290 ; X64: ## %bb.0: ## %entry
291 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
292 ; X64-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa8,0xc2]
293 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2
294 ; X64-NEXT: retq ## encoding: [0xc3]
296 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
297 %1 = bitcast i8 %__U to <8 x i1>
298 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
302 define <8 x double> @test_mm512_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
303 ; CHECK-LABEL: test_mm512_fmsub_pd:
304 ; CHECK: ## %bb.0: ## %entry
305 ; CHECK-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xaa,0xc2]
306 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2
307 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
309 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
310 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
314 define <8 x double> @test_mm512_mask_fmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
315 ; X86-LABEL: test_mm512_mask_fmsub_pd:
316 ; X86: ## %bb.0: ## %entry
317 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
318 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
319 ; X86-NEXT: vfmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9a,0xc1]
320 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) - zmm2
321 ; X86-NEXT: retl ## encoding: [0xc3]
323 ; X64-LABEL: test_mm512_mask_fmsub_pd:
324 ; X64: ## %bb.0: ## %entry
325 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
326 ; X64-NEXT: vfmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9a,0xc1]
327 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) - zmm2
328 ; X64-NEXT: retq ## encoding: [0xc3]
330 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
331 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
332 %1 = bitcast i8 %__U to <8 x i1>
333 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
337 define <8 x double> @test_mm512_maskz_fmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
338 ; X86-LABEL: test_mm512_maskz_fmsub_pd:
339 ; X86: ## %bb.0: ## %entry
340 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
341 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
342 ; X86-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xaa,0xc2]
343 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2
344 ; X86-NEXT: retl ## encoding: [0xc3]
346 ; X64-LABEL: test_mm512_maskz_fmsub_pd:
347 ; X64: ## %bb.0: ## %entry
348 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
349 ; X64-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xaa,0xc2]
350 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2
351 ; X64-NEXT: retq ## encoding: [0xc3]
353 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
354 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
355 %1 = bitcast i8 %__U to <8 x i1>
356 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
360 define <8 x double> @test_mm512_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
361 ; CHECK-LABEL: test_mm512_fnmadd_pd:
362 ; CHECK: ## %bb.0: ## %entry
363 ; CHECK-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xac,0xc2]
364 ; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2
365 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
367 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
368 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10
372 define <8 x double> @test_mm512_mask3_fnmadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
373 ; X86-LABEL: test_mm512_mask3_fnmadd_pd:
374 ; X86: ## %bb.0: ## %entry
375 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
376 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
377 ; X86-NEXT: vfnmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbc,0xd1]
378 ; X86-NEXT: ## zmm2 = -(zmm0 * zmm1) + zmm2
379 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
380 ; X86-NEXT: retl ## encoding: [0xc3]
382 ; X64-LABEL: test_mm512_mask3_fnmadd_pd:
383 ; X64: ## %bb.0: ## %entry
384 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
385 ; X64-NEXT: vfnmadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbc,0xd1]
386 ; X64-NEXT: ## zmm2 = -(zmm0 * zmm1) + zmm2
387 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
388 ; X64-NEXT: retq ## encoding: [0xc3]
390 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
391 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10
392 %1 = bitcast i8 %__U to <8 x i1>
393 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
397 define <8 x double> @test_mm512_maskz_fnmadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
398 ; X86-LABEL: test_mm512_maskz_fnmadd_pd:
399 ; X86: ## %bb.0: ## %entry
400 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
401 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
402 ; X86-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xac,0xc2]
403 ; X86-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2
404 ; X86-NEXT: retl ## encoding: [0xc3]
406 ; X64-LABEL: test_mm512_maskz_fnmadd_pd:
407 ; X64: ## %bb.0: ## %entry
408 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
409 ; X64-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xac,0xc2]
410 ; X64-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2
411 ; X64-NEXT: retq ## encoding: [0xc3]
413 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
414 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10
415 %1 = bitcast i8 %__U to <8 x i1>
416 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
420 define <8 x double> @test_mm512_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
421 ; CHECK-LABEL: test_mm512_fnmsub_pd:
422 ; CHECK: ## %bb.0: ## %entry
423 ; CHECK-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xae,0xc2]
424 ; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2
425 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
427 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
428 %sub1.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
429 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %sub1.i) #10
433 define <8 x double> @test_mm512_maskz_fnmsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
434 ; X86-LABEL: test_mm512_maskz_fnmsub_pd:
435 ; X86: ## %bb.0: ## %entry
436 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
437 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
438 ; X86-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xae,0xc2]
439 ; X86-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2
440 ; X86-NEXT: retl ## encoding: [0xc3]
442 ; X64-LABEL: test_mm512_maskz_fnmsub_pd:
443 ; X64: ## %bb.0: ## %entry
444 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
445 ; X64-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xae,0xc2]
446 ; X64-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2
447 ; X64-NEXT: retq ## encoding: [0xc3]
449 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
450 %sub1.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
451 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %sub1.i) #10
452 %1 = bitcast i8 %__U to <8 x i1>
453 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
457 define <16 x float> @test_mm512_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
458 ; CHECK-LABEL: test_mm512_fmadd_round_ps:
459 ; CHECK: ## %bb.0: ## %entry
460 ; CHECK-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0xc2]
461 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
463 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
467 declare <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) #1
469 define <16 x float> @test_mm512_mask_fmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
470 ; X86-LABEL: test_mm512_mask_fmadd_round_ps:
471 ; X86: ## %bb.0: ## %entry
472 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
473 ; X86-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
474 ; X86-NEXT: retl ## encoding: [0xc3]
476 ; X64-LABEL: test_mm512_mask_fmadd_round_ps:
477 ; X64: ## %bb.0: ## %entry
478 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
479 ; X64-NEXT: vfmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x98,0xc1]
480 ; X64-NEXT: retq ## encoding: [0xc3]
482 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
483 %1 = bitcast i16 %__U to <16 x i1>
484 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
488 define <16 x float> @test_mm512_mask3_fmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
489 ; X86-LABEL: test_mm512_mask3_fmadd_round_ps:
490 ; X86: ## %bb.0: ## %entry
491 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
492 ; X86-NEXT: vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb8,0xd1]
493 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
494 ; X86-NEXT: retl ## encoding: [0xc3]
496 ; X64-LABEL: test_mm512_mask3_fmadd_round_ps:
497 ; X64: ## %bb.0: ## %entry
498 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
499 ; X64-NEXT: vfmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb8,0xd1]
500 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
501 ; X64-NEXT: retq ## encoding: [0xc3]
503 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
504 %1 = bitcast i16 %__U to <16 x i1>
505 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
509 define <16 x float> @test_mm512_maskz_fmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
510 ; X86-LABEL: test_mm512_maskz_fmadd_round_ps:
511 ; X86: ## %bb.0: ## %entry
512 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
513 ; X86-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa8,0xc2]
514 ; X86-NEXT: retl ## encoding: [0xc3]
516 ; X64-LABEL: test_mm512_maskz_fmadd_round_ps:
517 ; X64: ## %bb.0: ## %entry
518 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
519 ; X64-NEXT: vfmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa8,0xc2]
520 ; X64-NEXT: retq ## encoding: [0xc3]
522 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
523 %1 = bitcast i16 %__U to <16 x i1>
524 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
528 define <16 x float> @test_mm512_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
529 ; CHECK-LABEL: test_mm512_fmsub_round_ps:
530 ; CHECK: ## %bb.0: ## %entry
531 ; CHECK-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xaa,0xc2]
532 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
534 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
535 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
539 define <16 x float> @test_mm512_mask_fmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
540 ; X86-LABEL: test_mm512_mask_fmsub_round_ps:
541 ; X86: ## %bb.0: ## %entry
542 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
543 ; X86-NEXT: vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9a,0xc1]
544 ; X86-NEXT: retl ## encoding: [0xc3]
546 ; X64-LABEL: test_mm512_mask_fmsub_round_ps:
547 ; X64: ## %bb.0: ## %entry
548 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
549 ; X64-NEXT: vfmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9a,0xc1]
550 ; X64-NEXT: retq ## encoding: [0xc3]
552 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
553 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
554 %1 = bitcast i16 %__U to <16 x i1>
555 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
559 define <16 x float> @test_mm512_maskz_fmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
560 ; X86-LABEL: test_mm512_maskz_fmsub_round_ps:
561 ; X86: ## %bb.0: ## %entry
562 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
563 ; X86-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xaa,0xc2]
564 ; X86-NEXT: retl ## encoding: [0xc3]
566 ; X64-LABEL: test_mm512_maskz_fmsub_round_ps:
567 ; X64: ## %bb.0: ## %entry
568 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
569 ; X64-NEXT: vfmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xaa,0xc2]
570 ; X64-NEXT: retq ## encoding: [0xc3]
572 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
573 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
574 %1 = bitcast i16 %__U to <16 x i1>
575 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
579 define <16 x float> @test_mm512_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
580 ; CHECK-LABEL: test_mm512_fnmadd_round_ps:
581 ; CHECK: ## %bb.0: ## %entry
582 ; CHECK-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xac,0xc2]
583 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
585 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
586 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8)
590 define <16 x float> @test_mm512_mask3_fnmadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
591 ; X86-LABEL: test_mm512_mask3_fnmadd_round_ps:
592 ; X86: ## %bb.0: ## %entry
593 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
594 ; X86-NEXT: vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbc,0xd1]
595 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
596 ; X86-NEXT: retl ## encoding: [0xc3]
598 ; X64-LABEL: test_mm512_mask3_fnmadd_round_ps:
599 ; X64: ## %bb.0: ## %entry
600 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
601 ; X64-NEXT: vfnmadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbc,0xd1]
602 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
603 ; X64-NEXT: retq ## encoding: [0xc3]
605 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
606 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8)
607 %1 = bitcast i16 %__U to <16 x i1>
608 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
612 define <16 x float> @test_mm512_maskz_fnmadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
613 ; X86-LABEL: test_mm512_maskz_fnmadd_round_ps:
614 ; X86: ## %bb.0: ## %entry
615 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
616 ; X86-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xac,0xc2]
617 ; X86-NEXT: retl ## encoding: [0xc3]
619 ; X64-LABEL: test_mm512_maskz_fnmadd_round_ps:
620 ; X64: ## %bb.0: ## %entry
621 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
622 ; X64-NEXT: vfnmadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xac,0xc2]
623 ; X64-NEXT: retq ## encoding: [0xc3]
625 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
626 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8)
627 %1 = bitcast i16 %__U to <16 x i1>
628 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
632 define <16 x float> @test_mm512_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
633 ; CHECK-LABEL: test_mm512_fnmsub_round_ps:
634 ; CHECK: ## %bb.0: ## %entry
635 ; CHECK-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xae,0xc2]
636 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
638 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
639 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
640 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %sub1, i32 8)
644 define <16 x float> @test_mm512_maskz_fnmsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
645 ; X86-LABEL: test_mm512_maskz_fnmsub_round_ps:
646 ; X86: ## %bb.0: ## %entry
647 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
648 ; X86-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xae,0xc2]
649 ; X86-NEXT: retl ## encoding: [0xc3]
651 ; X64-LABEL: test_mm512_maskz_fnmsub_round_ps:
652 ; X64: ## %bb.0: ## %entry
653 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
654 ; X64-NEXT: vfnmsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xae,0xc2]
655 ; X64-NEXT: retq ## encoding: [0xc3]
657 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
658 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
659 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %sub1, i32 8)
660 %1 = bitcast i16 %__U to <16 x i1>
661 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
665 define <16 x float> @test_mm512_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
666 ; CHECK-LABEL: test_mm512_fmadd_ps:
667 ; CHECK: ## %bb.0: ## %entry
668 ; CHECK-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa8,0xc2]
669 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2
670 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
672 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
676 define <16 x float> @test_mm512_mask_fmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
677 ; X86-LABEL: test_mm512_mask_fmadd_ps:
678 ; X86: ## %bb.0: ## %entry
679 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
680 ; X86-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
681 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) + zmm2
682 ; X86-NEXT: retl ## encoding: [0xc3]
684 ; X64-LABEL: test_mm512_mask_fmadd_ps:
685 ; X64: ## %bb.0: ## %entry
686 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
687 ; X64-NEXT: vfmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x98,0xc1]
688 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) + zmm2
689 ; X64-NEXT: retq ## encoding: [0xc3]
691 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
692 %1 = bitcast i16 %__U to <16 x i1>
693 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
697 define <16 x float> @test_mm512_mask3_fmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
698 ; X86-LABEL: test_mm512_mask3_fmadd_ps:
699 ; X86: ## %bb.0: ## %entry
700 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
701 ; X86-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1]
702 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) + zmm2
703 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
704 ; X86-NEXT: retl ## encoding: [0xc3]
706 ; X64-LABEL: test_mm512_mask3_fmadd_ps:
707 ; X64: ## %bb.0: ## %entry
708 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
709 ; X64-NEXT: vfmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb8,0xd1]
710 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) + zmm2
711 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
712 ; X64-NEXT: retq ## encoding: [0xc3]
714 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
715 %1 = bitcast i16 %__U to <16 x i1>
716 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
720 define <16 x float> @test_mm512_maskz_fmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
721 ; X86-LABEL: test_mm512_maskz_fmadd_ps:
722 ; X86: ## %bb.0: ## %entry
723 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
724 ; X86-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
725 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2
726 ; X86-NEXT: retl ## encoding: [0xc3]
728 ; X64-LABEL: test_mm512_maskz_fmadd_ps:
729 ; X64: ## %bb.0: ## %entry
730 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
731 ; X64-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa8,0xc2]
732 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) + zmm2
733 ; X64-NEXT: retq ## encoding: [0xc3]
735 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
736 %1 = bitcast i16 %__U to <16 x i1>
737 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
741 define <16 x float> @test_mm512_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
742 ; CHECK-LABEL: test_mm512_fmsub_ps:
743 ; CHECK: ## %bb.0: ## %entry
744 ; CHECK-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xaa,0xc2]
745 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2
746 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
748 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
749 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
753 define <16 x float> @test_mm512_mask_fmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
754 ; X86-LABEL: test_mm512_mask_fmsub_ps:
755 ; X86: ## %bb.0: ## %entry
756 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
757 ; X86-NEXT: vfmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9a,0xc1]
758 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) - zmm2
759 ; X86-NEXT: retl ## encoding: [0xc3]
761 ; X64-LABEL: test_mm512_mask_fmsub_ps:
762 ; X64: ## %bb.0: ## %entry
763 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
764 ; X64-NEXT: vfmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9a,0xc1]
765 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) - zmm2
766 ; X64-NEXT: retq ## encoding: [0xc3]
768 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
769 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
770 %1 = bitcast i16 %__U to <16 x i1>
771 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
775 define <16 x float> @test_mm512_maskz_fmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
776 ; X86-LABEL: test_mm512_maskz_fmsub_ps:
777 ; X86: ## %bb.0: ## %entry
778 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
779 ; X86-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xaa,0xc2]
780 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2
781 ; X86-NEXT: retl ## encoding: [0xc3]
783 ; X64-LABEL: test_mm512_maskz_fmsub_ps:
784 ; X64: ## %bb.0: ## %entry
785 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
786 ; X64-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xaa,0xc2]
787 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) - zmm2
788 ; X64-NEXT: retq ## encoding: [0xc3]
790 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
791 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
792 %1 = bitcast i16 %__U to <16 x i1>
793 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
797 define <16 x float> @test_mm512_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
798 ; CHECK-LABEL: test_mm512_fnmadd_ps:
799 ; CHECK: ## %bb.0: ## %entry
800 ; CHECK-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xac,0xc2]
801 ; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2
802 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
804 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
805 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10
809 define <16 x float> @test_mm512_mask3_fnmadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
810 ; X86-LABEL: test_mm512_mask3_fnmadd_ps:
811 ; X86: ## %bb.0: ## %entry
812 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
813 ; X86-NEXT: vfnmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbc,0xd1]
814 ; X86-NEXT: ## zmm2 = -(zmm0 * zmm1) + zmm2
815 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
816 ; X86-NEXT: retl ## encoding: [0xc3]
818 ; X64-LABEL: test_mm512_mask3_fnmadd_ps:
819 ; X64: ## %bb.0: ## %entry
820 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
821 ; X64-NEXT: vfnmadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbc,0xd1]
822 ; X64-NEXT: ## zmm2 = -(zmm0 * zmm1) + zmm2
823 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
824 ; X64-NEXT: retq ## encoding: [0xc3]
826 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
827 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10
828 %1 = bitcast i16 %__U to <16 x i1>
829 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
833 define <16 x float> @test_mm512_maskz_fnmadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
834 ; X86-LABEL: test_mm512_maskz_fnmadd_ps:
835 ; X86: ## %bb.0: ## %entry
836 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
837 ; X86-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xac,0xc2]
838 ; X86-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2
839 ; X86-NEXT: retl ## encoding: [0xc3]
841 ; X64-LABEL: test_mm512_maskz_fnmadd_ps:
842 ; X64: ## %bb.0: ## %entry
843 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
844 ; X64-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xac,0xc2]
845 ; X64-NEXT: ## zmm0 = -(zmm1 * zmm0) + zmm2
846 ; X64-NEXT: retq ## encoding: [0xc3]
848 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
849 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10
850 %1 = bitcast i16 %__U to <16 x i1>
851 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
855 define <16 x float> @test_mm512_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
856 ; CHECK-LABEL: test_mm512_fnmsub_ps:
857 ; CHECK: ## %bb.0: ## %entry
858 ; CHECK-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xae,0xc2]
859 ; CHECK-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2
860 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
862 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
863 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
864 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %sub1.i) #10
868 define <16 x float> @test_mm512_maskz_fnmsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
869 ; X86-LABEL: test_mm512_maskz_fnmsub_ps:
870 ; X86: ## %bb.0: ## %entry
871 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
872 ; X86-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xae,0xc2]
873 ; X86-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2
874 ; X86-NEXT: retl ## encoding: [0xc3]
876 ; X64-LABEL: test_mm512_maskz_fnmsub_ps:
877 ; X64: ## %bb.0: ## %entry
878 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
879 ; X64-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xae,0xc2]
880 ; X64-NEXT: ## zmm0 = -(zmm1 * zmm0) - zmm2
881 ; X64-NEXT: retq ## encoding: [0xc3]
883 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
884 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
885 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %sub1.i) #10
886 %1 = bitcast i16 %__U to <16 x i1>
887 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
891 define <8 x double> @test_mm512_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
892 ; CHECK-LABEL: test_mm512_fmaddsub_round_pd:
893 ; CHECK: ## %bb.0: ## %entry
894 ; CHECK-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa6,0xc2]
895 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
897 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
901 declare <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double>, <8 x double>, <8 x double>, i32) #1
903 define <8 x double> @test_mm512_mask_fmaddsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
904 ; X86-LABEL: test_mm512_mask_fmaddsub_round_pd:
905 ; X86: ## %bb.0: ## %entry
906 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
907 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
908 ; X86-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x96,0xc1]
909 ; X86-NEXT: retl ## encoding: [0xc3]
911 ; X64-LABEL: test_mm512_mask_fmaddsub_round_pd:
912 ; X64: ## %bb.0: ## %entry
913 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
914 ; X64-NEXT: vfmaddsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x96,0xc1]
915 ; X64-NEXT: retq ## encoding: [0xc3]
917 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
918 %1 = bitcast i8 %__U to <8 x i1>
919 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
923 define <8 x double> @test_mm512_mask3_fmaddsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
924 ; X86-LABEL: test_mm512_mask3_fmaddsub_round_pd:
925 ; X86: ## %bb.0: ## %entry
926 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
927 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
928 ; X86-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb6,0xd1]
929 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
930 ; X86-NEXT: retl ## encoding: [0xc3]
932 ; X64-LABEL: test_mm512_mask3_fmaddsub_round_pd:
933 ; X64: ## %bb.0: ## %entry
934 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
935 ; X64-NEXT: vfmaddsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb6,0xd1]
936 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
937 ; X64-NEXT: retq ## encoding: [0xc3]
939 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
940 %1 = bitcast i8 %__U to <8 x i1>
941 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
945 define <8 x double> @test_mm512_maskz_fmaddsub_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
946 ; X86-LABEL: test_mm512_maskz_fmaddsub_round_pd:
947 ; X86: ## %bb.0: ## %entry
948 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
949 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
950 ; X86-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa6,0xc2]
951 ; X86-NEXT: retl ## encoding: [0xc3]
953 ; X64-LABEL: test_mm512_maskz_fmaddsub_round_pd:
954 ; X64: ## %bb.0: ## %entry
955 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
956 ; X64-NEXT: vfmaddsub213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa6,0xc2]
957 ; X64-NEXT: retq ## encoding: [0xc3]
959 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i32 8)
960 %1 = bitcast i8 %__U to <8 x i1>
961 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
965 define <8 x double> @test_mm512_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
966 ; CHECK-LABEL: test_mm512_fmsubadd_round_pd:
967 ; CHECK: ## %bb.0: ## %entry
968 ; CHECK-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x18,0xa7,0xc2]
969 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
971 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
972 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
976 define <8 x double> @test_mm512_mask_fmsubadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
977 ; X86-LABEL: test_mm512_mask_fmsubadd_round_pd:
978 ; X86: ## %bb.0: ## %entry
979 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
980 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
981 ; X86-NEXT: vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x97,0xc1]
982 ; X86-NEXT: retl ## encoding: [0xc3]
984 ; X64-LABEL: test_mm512_mask_fmsubadd_round_pd:
985 ; X64: ## %bb.0: ## %entry
986 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
987 ; X64-NEXT: vfmsubadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x97,0xc1]
988 ; X64-NEXT: retq ## encoding: [0xc3]
990 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
991 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
992 %1 = bitcast i8 %__U to <8 x i1>
993 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
997 define <8 x double> @test_mm512_maskz_fmsubadd_round_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
998 ; X86-LABEL: test_mm512_maskz_fmsubadd_round_pd:
999 ; X86: ## %bb.0: ## %entry
1000 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1001 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1002 ; X86-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa7,0xc2]
1003 ; X86-NEXT: retl ## encoding: [0xc3]
1005 ; X64-LABEL: test_mm512_maskz_fmsubadd_round_pd:
1006 ; X64: ## %bb.0: ## %entry
1007 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1008 ; X64-NEXT: vfmsubadd213pd {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x99,0xa7,0xc2]
1009 ; X64-NEXT: retq ## encoding: [0xc3]
1011 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1012 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
1013 %1 = bitcast i8 %__U to <8 x i1>
1014 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> zeroinitializer
1018 define <8 x double> @test_mm512_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
1019 ; CHECK-LABEL: test_mm512_fmaddsub_pd:
1020 ; CHECK: ## %bb.0: ## %entry
1021 ; CHECK-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa6,0xc2]
1022 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2
1023 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1025 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1026 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1027 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10
1028 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1032 define <8 x double> @test_mm512_mask_fmaddsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
1033 ; X86-LABEL: test_mm512_mask_fmaddsub_pd:
1034 ; X86: ## %bb.0: ## %entry
1035 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1036 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1037 ; X86-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
1038 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) +/- zmm2
1039 ; X86-NEXT: retl ## encoding: [0xc3]
1041 ; X64-LABEL: test_mm512_mask_fmaddsub_pd:
1042 ; X64: ## %bb.0: ## %entry
1043 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1044 ; X64-NEXT: vfmaddsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x96,0xc1]
1045 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) +/- zmm2
1046 ; X64-NEXT: retq ## encoding: [0xc3]
1048 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1049 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1050 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10
1051 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1052 %4 = bitcast i8 %__U to <8 x i1>
1053 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__A
1057 define <8 x double> @test_mm512_mask3_fmaddsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1058 ; X86-LABEL: test_mm512_mask3_fmaddsub_pd:
1059 ; X86: ## %bb.0: ## %entry
1060 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1061 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1062 ; X86-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1]
1063 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) +/- zmm2
1064 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1065 ; X86-NEXT: retl ## encoding: [0xc3]
1067 ; X64-LABEL: test_mm512_mask3_fmaddsub_pd:
1068 ; X64: ## %bb.0: ## %entry
1069 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1070 ; X64-NEXT: vfmaddsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb6,0xd1]
1071 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) +/- zmm2
1072 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1073 ; X64-NEXT: retq ## encoding: [0xc3]
1075 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1076 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1077 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10
1078 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1079 %4 = bitcast i8 %__U to <8 x i1>
1080 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> %__C
1084 define <8 x double> @test_mm512_maskz_fmaddsub_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
1085 ; X86-LABEL: test_mm512_maskz_fmaddsub_pd:
1086 ; X86: ## %bb.0: ## %entry
1087 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1088 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1089 ; X86-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
1090 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2
1091 ; X86-NEXT: retl ## encoding: [0xc3]
1093 ; X64-LABEL: test_mm512_maskz_fmaddsub_pd:
1094 ; X64: ## %bb.0: ## %entry
1095 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1096 ; X64-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa6,0xc2]
1097 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2
1098 ; X64-NEXT: retq ## encoding: [0xc3]
1100 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1101 %1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1102 %2 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %1) #10
1103 %3 = shufflevector <8 x double> %2, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1104 %4 = bitcast i8 %__U to <8 x i1>
1105 %5 = select <8 x i1> %4, <8 x double> %3, <8 x double> zeroinitializer
1109 define <8 x double> @test_mm512_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
1110 ; CHECK-LABEL: test_mm512_fmsubadd_pd:
1111 ; CHECK: ## %bb.0: ## %entry
1112 ; CHECK-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0xf5,0x48,0xa7,0xc2]
1113 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2
1114 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1116 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1117 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
1118 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1119 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1123 define <8 x double> @test_mm512_mask_fmsubadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
1124 ; X86-LABEL: test_mm512_mask_fmsubadd_pd:
1125 ; X86: ## %bb.0: ## %entry
1126 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1127 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1128 ; X86-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x97,0xc1]
1129 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) -/+ zmm2
1130 ; X86-NEXT: retl ## encoding: [0xc3]
1132 ; X64-LABEL: test_mm512_mask_fmsubadd_pd:
1133 ; X64: ## %bb.0: ## %entry
1134 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1135 ; X64-NEXT: vfmsubadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x97,0xc1]
1136 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) -/+ zmm2
1137 ; X64-NEXT: retq ## encoding: [0xc3]
1139 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1140 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
1141 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1142 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1143 %3 = bitcast i8 %__U to <8 x i1>
1144 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__A
1148 define <8 x double> @test_mm512_maskz_fmsubadd_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B, <8 x double> %__C) {
1149 ; X86-LABEL: test_mm512_maskz_fmsubadd_pd:
1150 ; X86: ## %bb.0: ## %entry
1151 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1152 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1153 ; X86-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa7,0xc2]
1154 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2
1155 ; X86-NEXT: retl ## encoding: [0xc3]
1157 ; X64-LABEL: test_mm512_maskz_fmsubadd_pd:
1158 ; X64: ## %bb.0: ## %entry
1159 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1160 ; X64-NEXT: vfmsubadd213pd %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0xa7,0xc2]
1161 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2
1162 ; X64-NEXT: retq ## encoding: [0xc3]
1164 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1165 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
1166 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1167 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1168 %3 = bitcast i8 %__U to <8 x i1>
1169 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
1173 define <16 x float> @test_mm512_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1174 ; CHECK-LABEL: test_mm512_fmaddsub_round_ps:
1175 ; CHECK: ## %bb.0: ## %entry
1176 ; CHECK-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa6,0xc2]
1177 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1179 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
1183 declare <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float>, <16 x float>, <16 x float>, i32) #1
1185 define <16 x float> @test_mm512_mask_fmaddsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1186 ; X86-LABEL: test_mm512_mask_fmaddsub_round_ps:
1187 ; X86: ## %bb.0: ## %entry
1188 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1189 ; X86-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x96,0xc1]
1190 ; X86-NEXT: retl ## encoding: [0xc3]
1192 ; X64-LABEL: test_mm512_mask_fmaddsub_round_ps:
1193 ; X64: ## %bb.0: ## %entry
1194 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1195 ; X64-NEXT: vfmaddsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x96,0xc1]
1196 ; X64-NEXT: retq ## encoding: [0xc3]
1198 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
1199 %1 = bitcast i16 %__U to <16 x i1>
1200 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
1204 define <16 x float> @test_mm512_mask3_fmaddsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1205 ; X86-LABEL: test_mm512_mask3_fmaddsub_round_ps:
1206 ; X86: ## %bb.0: ## %entry
1207 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1208 ; X86-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb6,0xd1]
1209 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1210 ; X86-NEXT: retl ## encoding: [0xc3]
1212 ; X64-LABEL: test_mm512_mask3_fmaddsub_round_ps:
1213 ; X64: ## %bb.0: ## %entry
1214 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1215 ; X64-NEXT: vfmaddsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb6,0xd1]
1216 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1217 ; X64-NEXT: retq ## encoding: [0xc3]
1219 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
1220 %1 = bitcast i16 %__U to <16 x i1>
1221 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
1225 define <16 x float> @test_mm512_maskz_fmaddsub_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1226 ; X86-LABEL: test_mm512_maskz_fmaddsub_round_ps:
1227 ; X86: ## %bb.0: ## %entry
1228 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1229 ; X86-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa6,0xc2]
1230 ; X86-NEXT: retl ## encoding: [0xc3]
1232 ; X64-LABEL: test_mm512_maskz_fmaddsub_round_ps:
1233 ; X64: ## %bb.0: ## %entry
1234 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1235 ; X64-NEXT: vfmaddsub213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa6,0xc2]
1236 ; X64-NEXT: retq ## encoding: [0xc3]
1238 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i32 8)
1239 %1 = bitcast i16 %__U to <16 x i1>
1240 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
1244 define <16 x float> @test_mm512_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1245 ; CHECK-LABEL: test_mm512_fmsubadd_round_ps:
1246 ; CHECK: ## %bb.0: ## %entry
1247 ; CHECK-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa7,0xc2]
1248 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1250 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1251 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
1255 define <16 x float> @test_mm512_mask_fmsubadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1256 ; X86-LABEL: test_mm512_mask_fmsubadd_round_ps:
1257 ; X86: ## %bb.0: ## %entry
1258 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1259 ; X86-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x97,0xc1]
1260 ; X86-NEXT: retl ## encoding: [0xc3]
1262 ; X64-LABEL: test_mm512_mask_fmsubadd_round_ps:
1263 ; X64: ## %bb.0: ## %entry
1264 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1265 ; X64-NEXT: vfmsubadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x97,0xc1]
1266 ; X64-NEXT: retq ## encoding: [0xc3]
1268 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1269 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
1270 %1 = bitcast i16 %__U to <16 x i1>
1271 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
1275 define <16 x float> @test_mm512_maskz_fmsubadd_round_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1276 ; X86-LABEL: test_mm512_maskz_fmsubadd_round_ps:
1277 ; X86: ## %bb.0: ## %entry
1278 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1279 ; X86-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa7,0xc2]
1280 ; X86-NEXT: retl ## encoding: [0xc3]
1282 ; X64-LABEL: test_mm512_maskz_fmsubadd_round_ps:
1283 ; X64: ## %bb.0: ## %entry
1284 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1285 ; X64-NEXT: vfmsubadd213ps {rn-sae}, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x99,0xa7,0xc2]
1286 ; X64-NEXT: retq ## encoding: [0xc3]
1288 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1289 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
1290 %1 = bitcast i16 %__U to <16 x i1>
1291 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> zeroinitializer
1295 define <16 x float> @test_mm512_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1296 ; CHECK-LABEL: test_mm512_fmaddsub_ps:
1297 ; CHECK: ## %bb.0: ## %entry
1298 ; CHECK-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa6,0xc2]
1299 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2
1300 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1302 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1303 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1304 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10
1305 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1309 define <16 x float> @test_mm512_mask_fmaddsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1310 ; X86-LABEL: test_mm512_mask_fmaddsub_ps:
1311 ; X86: ## %bb.0: ## %entry
1312 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1313 ; X86-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
1314 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) +/- zmm2
1315 ; X86-NEXT: retl ## encoding: [0xc3]
1317 ; X64-LABEL: test_mm512_mask_fmaddsub_ps:
1318 ; X64: ## %bb.0: ## %entry
1319 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1320 ; X64-NEXT: vfmaddsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x96,0xc1]
1321 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) +/- zmm2
1322 ; X64-NEXT: retq ## encoding: [0xc3]
1324 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1325 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1326 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10
1327 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1328 %4 = bitcast i16 %__U to <16 x i1>
1329 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__A
1333 define <16 x float> @test_mm512_mask3_fmaddsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1334 ; X86-LABEL: test_mm512_mask3_fmaddsub_ps:
1335 ; X86: ## %bb.0: ## %entry
1336 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1337 ; X86-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1]
1338 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) +/- zmm2
1339 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1340 ; X86-NEXT: retl ## encoding: [0xc3]
1342 ; X64-LABEL: test_mm512_mask3_fmaddsub_ps:
1343 ; X64: ## %bb.0: ## %entry
1344 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1345 ; X64-NEXT: vfmaddsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb6,0xd1]
1346 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) +/- zmm2
1347 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1348 ; X64-NEXT: retq ## encoding: [0xc3]
1350 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1351 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1352 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10
1353 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1354 %4 = bitcast i16 %__U to <16 x i1>
1355 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> %__C
1359 define <16 x float> @test_mm512_maskz_fmaddsub_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1360 ; X86-LABEL: test_mm512_maskz_fmaddsub_ps:
1361 ; X86: ## %bb.0: ## %entry
1362 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1363 ; X86-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
1364 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2
1365 ; X86-NEXT: retl ## encoding: [0xc3]
1367 ; X64-LABEL: test_mm512_maskz_fmaddsub_ps:
1368 ; X64: ## %bb.0: ## %entry
1369 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1370 ; X64-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa6,0xc2]
1371 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) +/- zmm2
1372 ; X64-NEXT: retq ## encoding: [0xc3]
1374 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1375 %1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1376 %2 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %1) #10
1377 %3 = shufflevector <16 x float> %2, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1378 %4 = bitcast i16 %__U to <16 x i1>
1379 %5 = select <16 x i1> %4, <16 x float> %3, <16 x float> zeroinitializer
1383 define <16 x float> @test_mm512_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1384 ; CHECK-LABEL: test_mm512_fmsubadd_ps:
1385 ; CHECK: ## %bb.0: ## %entry
1386 ; CHECK-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0xa7,0xc2]
1387 ; CHECK-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2
1388 ; CHECK-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
1390 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1391 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
1392 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1393 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1397 define <16 x float> @test_mm512_mask_fmsubadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1398 ; X86-LABEL: test_mm512_mask_fmsubadd_ps:
1399 ; X86: ## %bb.0: ## %entry
1400 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1401 ; X86-NEXT: vfmsubadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x97,0xc1]
1402 ; X86-NEXT: ## zmm0 = (zmm0 * zmm1) -/+ zmm2
1403 ; X86-NEXT: retl ## encoding: [0xc3]
1405 ; X64-LABEL: test_mm512_mask_fmsubadd_ps:
1406 ; X64: ## %bb.0: ## %entry
1407 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1408 ; X64-NEXT: vfmsubadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x97,0xc1]
1409 ; X64-NEXT: ## zmm0 = (zmm0 * zmm1) -/+ zmm2
1410 ; X64-NEXT: retq ## encoding: [0xc3]
1412 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1413 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
1414 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1415 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1416 %3 = bitcast i16 %__U to <16 x i1>
1417 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__A
1421 define <16 x float> @test_mm512_maskz_fmsubadd_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B, <16 x float> %__C) {
1422 ; X86-LABEL: test_mm512_maskz_fmsubadd_ps:
1423 ; X86: ## %bb.0: ## %entry
1424 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1425 ; X86-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa7,0xc2]
1426 ; X86-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2
1427 ; X86-NEXT: retl ## encoding: [0xc3]
1429 ; X64-LABEL: test_mm512_maskz_fmsubadd_ps:
1430 ; X64: ## %bb.0: ## %entry
1431 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1432 ; X64-NEXT: vfmsubadd213ps %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0xa7,0xc2]
1433 ; X64-NEXT: ## zmm0 = (zmm1 * zmm0) -/+ zmm2
1434 ; X64-NEXT: retq ## encoding: [0xc3]
1436 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1437 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
1438 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1439 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1440 %3 = bitcast i16 %__U to <16 x i1>
1441 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
1445 define <8 x double> @test_mm512_mask3_fmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1446 ; X86-LABEL: test_mm512_mask3_fmsub_round_pd:
1447 ; X86: ## %bb.0: ## %entry
1448 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1449 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1450 ; X86-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xba,0xd1]
1451 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1452 ; X86-NEXT: retl ## encoding: [0xc3]
1454 ; X64-LABEL: test_mm512_mask3_fmsub_round_pd:
1455 ; X64: ## %bb.0: ## %entry
1456 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1457 ; X64-NEXT: vfmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xba,0xd1]
1458 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1459 ; X64-NEXT: retq ## encoding: [0xc3]
1461 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1462 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
1463 %1 = bitcast i8 %__U to <8 x i1>
1464 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
1468 define <8 x double> @test_mm512_mask3_fmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1469 ; X86-LABEL: test_mm512_mask3_fmsub_pd:
1470 ; X86: ## %bb.0: ## %entry
1471 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1472 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1473 ; X86-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1]
1474 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) - zmm2
1475 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1476 ; X86-NEXT: retl ## encoding: [0xc3]
1478 ; X64-LABEL: test_mm512_mask3_fmsub_pd:
1479 ; X64: ## %bb.0: ## %entry
1480 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1481 ; X64-NEXT: vfmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xba,0xd1]
1482 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) - zmm2
1483 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1484 ; X64-NEXT: retq ## encoding: [0xc3]
1486 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1487 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
1488 %1 = bitcast i8 %__U to <8 x i1>
1489 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
1493 define <16 x float> @test_mm512_mask3_fmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1494 ; X86-LABEL: test_mm512_mask3_fmsub_round_ps:
1495 ; X86: ## %bb.0: ## %entry
1496 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1497 ; X86-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xba,0xd1]
1498 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1499 ; X86-NEXT: retl ## encoding: [0xc3]
1501 ; X64-LABEL: test_mm512_mask3_fmsub_round_ps:
1502 ; X64: ## %bb.0: ## %entry
1503 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1504 ; X64-NEXT: vfmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xba,0xd1]
1505 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1506 ; X64-NEXT: retq ## encoding: [0xc3]
1508 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1509 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
1510 %1 = bitcast i16 %__U to <16 x i1>
1511 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
1515 define <16 x float> @test_mm512_mask3_fmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1516 ; X86-LABEL: test_mm512_mask3_fmsub_ps:
1517 ; X86: ## %bb.0: ## %entry
1518 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1519 ; X86-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1]
1520 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) - zmm2
1521 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1522 ; X86-NEXT: retl ## encoding: [0xc3]
1524 ; X64-LABEL: test_mm512_mask3_fmsub_ps:
1525 ; X64: ## %bb.0: ## %entry
1526 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1527 ; X64-NEXT: vfmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xba,0xd1]
1528 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) - zmm2
1529 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1530 ; X64-NEXT: retq ## encoding: [0xc3]
1532 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1533 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
1534 %1 = bitcast i16 %__U to <16 x i1>
1535 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
1539 define <8 x double> @test_mm512_mask3_fmsubadd_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1540 ; X86-LABEL: test_mm512_mask3_fmsubadd_round_pd:
1541 ; X86: ## %bb.0: ## %entry
1542 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1543 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1544 ; X86-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb7,0xd1]
1545 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1546 ; X86-NEXT: retl ## encoding: [0xc3]
1548 ; X64-LABEL: test_mm512_mask3_fmsubadd_round_pd:
1549 ; X64: ## %bb.0: ## %entry
1550 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1551 ; X64-NEXT: vfmsubadd231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xb7,0xd1]
1552 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1553 ; X64-NEXT: retq ## encoding: [0xc3]
1555 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1556 %0 = tail call <8 x double> @llvm.x86.avx512.vfmaddsub.pd.512(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub, i32 8)
1557 %1 = bitcast i8 %__U to <8 x i1>
1558 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
1562 define <8 x double> @test_mm512_mask3_fmsubadd_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1563 ; X86-LABEL: test_mm512_mask3_fmsubadd_pd:
1564 ; X86: ## %bb.0: ## %entry
1565 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1566 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1567 ; X86-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1]
1568 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) -/+ zmm2
1569 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1570 ; X86-NEXT: retl ## encoding: [0xc3]
1572 ; X64-LABEL: test_mm512_mask3_fmsubadd_pd:
1573 ; X64: ## %bb.0: ## %entry
1574 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1575 ; X64-NEXT: vfmsubadd231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xb7,0xd1]
1576 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) -/+ zmm2
1577 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1578 ; X64-NEXT: retq ## encoding: [0xc3]
1580 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1581 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %sub.i) #10
1582 %1 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C) #10
1583 %2 = shufflevector <8 x double> %1, <8 x double> %0, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
1584 %3 = bitcast i8 %__U to <8 x i1>
1585 %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__C
1589 define <16 x float> @test_mm512_mask3_fmsubadd_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1590 ; X86-LABEL: test_mm512_mask3_fmsubadd_round_ps:
1591 ; X86: ## %bb.0: ## %entry
1592 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1593 ; X86-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb7,0xd1]
1594 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1595 ; X86-NEXT: retl ## encoding: [0xc3]
1597 ; X64-LABEL: test_mm512_mask3_fmsubadd_round_ps:
1598 ; X64: ## %bb.0: ## %entry
1599 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1600 ; X64-NEXT: vfmsubadd231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xb7,0xd1]
1601 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1602 ; X64-NEXT: retq ## encoding: [0xc3]
1604 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1605 %0 = tail call <16 x float> @llvm.x86.avx512.vfmaddsub.ps.512(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub, i32 8)
1606 %1 = bitcast i16 %__U to <16 x i1>
1607 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
1611 define <16 x float> @test_mm512_mask3_fmsubadd_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1612 ; X86-LABEL: test_mm512_mask3_fmsubadd_ps:
1613 ; X86: ## %bb.0: ## %entry
1614 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1615 ; X86-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1]
1616 ; X86-NEXT: ## zmm2 = (zmm0 * zmm1) -/+ zmm2
1617 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1618 ; X86-NEXT: retl ## encoding: [0xc3]
1620 ; X64-LABEL: test_mm512_mask3_fmsubadd_ps:
1621 ; X64: ## %bb.0: ## %entry
1622 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1623 ; X64-NEXT: vfmsubadd231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xb7,0xd1]
1624 ; X64-NEXT: ## zmm2 = (zmm0 * zmm1) -/+ zmm2
1625 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1626 ; X64-NEXT: retq ## encoding: [0xc3]
1628 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1629 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %sub.i) #10
1630 %1 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C) #10
1631 %2 = shufflevector <16 x float> %1, <16 x float> %0, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
1632 %3 = bitcast i16 %__U to <16 x i1>
1633 %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__C
1637 define <8 x double> @test_mm512_mask_fnmadd_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
1638 ; X86-LABEL: test_mm512_mask_fnmadd_round_pd:
1639 ; X86: ## %bb.0: ## %entry
1640 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1641 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1642 ; X86-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9c,0xc1]
1643 ; X86-NEXT: retl ## encoding: [0xc3]
1645 ; X64-LABEL: test_mm512_mask_fnmadd_round_pd:
1646 ; X64: ## %bb.0: ## %entry
1647 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1648 ; X64-NEXT: vfnmadd132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9c,0xc1]
1649 ; X64-NEXT: retq ## encoding: [0xc3]
1651 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
1652 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %sub, <8 x double> %__B, <8 x double> %__C, i32 8)
1653 %1 = bitcast i8 %__U to <8 x i1>
1654 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
1658 define <8 x double> @test_mm512_mask_fnmadd_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
1659 ; X86-LABEL: test_mm512_mask_fnmadd_pd:
1660 ; X86: ## %bb.0: ## %entry
1661 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1662 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1663 ; X86-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
1664 ; X86-NEXT: ## zmm0 = -(zmm0 * zmm1) + zmm2
1665 ; X86-NEXT: retl ## encoding: [0xc3]
1667 ; X64-LABEL: test_mm512_mask_fnmadd_pd:
1668 ; X64: ## %bb.0: ## %entry
1669 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1670 ; X64-NEXT: vfnmadd132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9c,0xc1]
1671 ; X64-NEXT: ## zmm0 = -(zmm0 * zmm1) + zmm2
1672 ; X64-NEXT: retq ## encoding: [0xc3]
1674 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__A
1675 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %sub.i, <8 x double> %__B, <8 x double> %__C) #10
1676 %1 = bitcast i8 %__U to <8 x i1>
1677 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
1681 define <16 x float> @test_mm512_mask_fnmadd_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1682 ; X86-LABEL: test_mm512_mask_fnmadd_round_ps:
1683 ; X86: ## %bb.0: ## %entry
1684 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1685 ; X86-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9c,0xc1]
1686 ; X86-NEXT: retl ## encoding: [0xc3]
1688 ; X64-LABEL: test_mm512_mask_fnmadd_round_ps:
1689 ; X64: ## %bb.0: ## %entry
1690 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1691 ; X64-NEXT: vfnmadd132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9c,0xc1]
1692 ; X64-NEXT: retq ## encoding: [0xc3]
1694 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
1695 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %sub, <16 x float> %__B, <16 x float> %__C, i32 8)
1696 %1 = bitcast i16 %__U to <16 x i1>
1697 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
1701 define <16 x float> @test_mm512_mask_fnmadd_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1702 ; X86-LABEL: test_mm512_mask_fnmadd_ps:
1703 ; X86: ## %bb.0: ## %entry
1704 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1705 ; X86-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
1706 ; X86-NEXT: ## zmm0 = -(zmm0 * zmm1) + zmm2
1707 ; X86-NEXT: retl ## encoding: [0xc3]
1709 ; X64-LABEL: test_mm512_mask_fnmadd_ps:
1710 ; X64: ## %bb.0: ## %entry
1711 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1712 ; X64-NEXT: vfnmadd132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9c,0xc1]
1713 ; X64-NEXT: ## zmm0 = -(zmm0 * zmm1) + zmm2
1714 ; X64-NEXT: retq ## encoding: [0xc3]
1716 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
1717 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %sub.i, <16 x float> %__B, <16 x float> %__C) #10
1718 %1 = bitcast i16 %__U to <16 x i1>
1719 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
1723 define <8 x double> @test_mm512_mask_fnmsub_round_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
1724 ; X86-LABEL: test_mm512_mask_fnmsub_round_pd:
1725 ; X86: ## %bb.0: ## %entry
1726 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1727 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1728 ; X86-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
1729 ; X86-NEXT: retl ## encoding: [0xc3]
1731 ; X64-LABEL: test_mm512_mask_fnmsub_round_pd:
1732 ; X64: ## %bb.0: ## %entry
1733 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1734 ; X64-NEXT: vfnmsub132pd {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x19,0x9e,0xc1]
1735 ; X64-NEXT: retq ## encoding: [0xc3]
1737 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B
1738 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1739 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %sub1, i32 8)
1740 %1 = bitcast i8 %__U to <8 x i1>
1741 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
1745 define <8 x double> @test_mm512_mask3_fnmsub_round_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1746 ; X86-LABEL: test_mm512_mask3_fnmsub_round_pd:
1747 ; X86: ## %bb.0: ## %entry
1748 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1749 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1750 ; X86-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbe,0xd1]
1751 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1752 ; X86-NEXT: retl ## encoding: [0xc3]
1754 ; X64-LABEL: test_mm512_mask3_fnmsub_round_pd:
1755 ; X64: ## %bb.0: ## %entry
1756 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1757 ; X64-NEXT: vfnmsub231pd {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x19,0xbe,0xd1]
1758 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1759 ; X64-NEXT: retq ## encoding: [0xc3]
1761 %sub = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B
1762 %sub1 = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1763 %0 = tail call <8 x double> @llvm.x86.avx512.vfmadd.pd.512(<8 x double> %__A, <8 x double> %sub, <8 x double> %sub1, i32 8)
1764 %1 = bitcast i8 %__U to <8 x i1>
1765 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
1769 define <8 x double> @test_mm512_mask_fnmsub_pd(<8 x double> %__A, i8 zeroext %__U, <8 x double> %__B, <8 x double> %__C) {
1770 ; X86-LABEL: test_mm512_mask_fnmsub_pd:
1771 ; X86: ## %bb.0: ## %entry
1772 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1773 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1774 ; X86-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
1775 ; X86-NEXT: ## zmm0 = -(zmm0 * zmm1) - zmm2
1776 ; X86-NEXT: retl ## encoding: [0xc3]
1778 ; X64-LABEL: test_mm512_mask_fnmsub_pd:
1779 ; X64: ## %bb.0: ## %entry
1780 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1781 ; X64-NEXT: vfnmsub132pd %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xed,0x49,0x9e,0xc1]
1782 ; X64-NEXT: ## zmm0 = -(zmm0 * zmm1) - zmm2
1783 ; X64-NEXT: retq ## encoding: [0xc3]
1785 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B
1786 %sub2.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1787 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub2.i) #10
1788 %1 = bitcast i8 %__U to <8 x i1>
1789 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__A
1793 define <8 x double> @test_mm512_mask3_fnmsub_pd(<8 x double> %__A, <8 x double> %__B, <8 x double> %__C, i8 zeroext %__U) {
1794 ; X86-LABEL: test_mm512_mask3_fnmsub_pd:
1795 ; X86: ## %bb.0: ## %entry
1796 ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
1797 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1798 ; X86-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1]
1799 ; X86-NEXT: ## zmm2 = -(zmm0 * zmm1) - zmm2
1800 ; X86-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1801 ; X86-NEXT: retl ## encoding: [0xc3]
1803 ; X64-LABEL: test_mm512_mask3_fnmsub_pd:
1804 ; X64: ## %bb.0: ## %entry
1805 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1806 ; X64-NEXT: vfnmsub231pd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0xbe,0xd1]
1807 ; X64-NEXT: ## zmm2 = -(zmm0 * zmm1) - zmm2
1808 ; X64-NEXT: vmovapd %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2]
1809 ; X64-NEXT: retq ## encoding: [0xc3]
1811 %sub.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__B
1812 %sub2.i = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %__C
1813 %0 = tail call <8 x double> @llvm.fma.v8f64(<8 x double> %__A, <8 x double> %sub.i, <8 x double> %sub2.i) #10
1814 %1 = bitcast i8 %__U to <8 x i1>
1815 %2 = select <8 x i1> %1, <8 x double> %0, <8 x double> %__C
1819 define <16 x float> @test_mm512_mask_fnmsub_round_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1820 ; X86-LABEL: test_mm512_mask_fnmsub_round_ps:
1821 ; X86: ## %bb.0: ## %entry
1822 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1823 ; X86-NEXT: vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9e,0xc1]
1824 ; X86-NEXT: retl ## encoding: [0xc3]
1826 ; X64-LABEL: test_mm512_mask_fnmsub_round_ps:
1827 ; X64: ## %bb.0: ## %entry
1828 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1829 ; X64-NEXT: vfnmsub132ps {rn-sae}, %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x19,0x9e,0xc1]
1830 ; X64-NEXT: retq ## encoding: [0xc3]
1832 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
1833 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1834 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8)
1835 %1 = bitcast i16 %__U to <16 x i1>
1836 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
1840 define <16 x float> @test_mm512_mask3_fnmsub_round_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1841 ; X86-LABEL: test_mm512_mask3_fnmsub_round_ps:
1842 ; X86: ## %bb.0: ## %entry
1843 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1844 ; X86-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbe,0xd1]
1845 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1846 ; X86-NEXT: retl ## encoding: [0xc3]
1848 ; X64-LABEL: test_mm512_mask3_fnmsub_round_ps:
1849 ; X64: ## %bb.0: ## %entry
1850 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1851 ; X64-NEXT: vfnmsub231ps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0xbe,0xd1]
1852 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1853 ; X64-NEXT: retq ## encoding: [0xc3]
1855 %sub = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
1856 %sub1 = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1857 %0 = tail call <16 x float> @llvm.x86.avx512.vfmadd.ps.512(<16 x float> %__A, <16 x float> %sub, <16 x float> %sub1, i32 8)
1858 %1 = bitcast i16 %__U to <16 x i1>
1859 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
1863 define <16 x float> @test_mm512_mask_fnmsub_ps(<16 x float> %__A, i16 zeroext %__U, <16 x float> %__B, <16 x float> %__C) {
1864 ; X86-LABEL: test_mm512_mask_fnmsub_ps:
1865 ; X86: ## %bb.0: ## %entry
1866 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1867 ; X86-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
1868 ; X86-NEXT: ## zmm0 = -(zmm0 * zmm1) - zmm2
1869 ; X86-NEXT: retl ## encoding: [0xc3]
1871 ; X64-LABEL: test_mm512_mask_fnmsub_ps:
1872 ; X64: ## %bb.0: ## %entry
1873 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1874 ; X64-NEXT: vfnmsub132ps %zmm1, %zmm2, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x6d,0x49,0x9e,0xc1]
1875 ; X64-NEXT: ## zmm0 = -(zmm0 * zmm1) - zmm2
1876 ; X64-NEXT: retq ## encoding: [0xc3]
1878 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
1879 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1880 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #10
1881 %1 = bitcast i16 %__U to <16 x i1>
1882 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__A
1886 define <16 x float> @test_mm512_mask3_fnmsub_ps(<16 x float> %__A, <16 x float> %__B, <16 x float> %__C, i16 zeroext %__U) {
1887 ; X86-LABEL: test_mm512_mask3_fnmsub_ps:
1888 ; X86: ## %bb.0: ## %entry
1889 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
1890 ; X86-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1]
1891 ; X86-NEXT: ## zmm2 = -(zmm0 * zmm1) - zmm2
1892 ; X86-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1893 ; X86-NEXT: retl ## encoding: [0xc3]
1895 ; X64-LABEL: test_mm512_mask3_fnmsub_ps:
1896 ; X64: ## %bb.0: ## %entry
1897 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1898 ; X64-NEXT: vfnmsub231ps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0xbe,0xd1]
1899 ; X64-NEXT: ## zmm2 = -(zmm0 * zmm1) - zmm2
1900 ; X64-NEXT: vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
1901 ; X64-NEXT: retq ## encoding: [0xc3]
1903 %sub.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
1904 %sub1.i = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
1905 %0 = tail call <16 x float> @llvm.fma.v16f32(<16 x float> %__A, <16 x float> %sub.i, <16 x float> %sub1.i) #10
1906 %1 = bitcast i16 %__U to <16 x i1>
1907 %2 = select <16 x i1> %1, <16 x float> %0, <16 x float> %__C
1911 define <4 x float> @test_mm_mask_fmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
1912 ; X86-LABEL: test_mm_mask_fmadd_ss:
1913 ; X86: ## %bb.0: ## %entry
1914 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
1915 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1916 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
1917 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1918 ; X86-NEXT: retl ## encoding: [0xc3]
1920 ; X64-LABEL: test_mm_mask_fmadd_ss:
1921 ; X64: ## %bb.0: ## %entry
1922 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1923 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
1924 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1925 ; X64-NEXT: retq ## encoding: [0xc3]
1927 %0 = extractelement <4 x float> %__W, i64 0
1928 %1 = extractelement <4 x float> %__A, i64 0
1929 %2 = extractelement <4 x float> %__B, i64 0
1930 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
1932 %tobool.i = icmp eq i8 %4, 0
1933 %vecext1.i = extractelement <4 x float> %__W, i32 0
1934 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3
1935 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0
1936 ret <4 x float> %vecins.i
1939 define <4 x float> @test_mm_mask_fmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
1940 ; X86-LABEL: test_mm_mask_fmadd_round_ss:
1941 ; X86: ## %bb.0: ## %entry
1942 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
1943 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1944 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
1945 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1946 ; X86-NEXT: retl ## encoding: [0xc3]
1948 ; X64-LABEL: test_mm_mask_fmadd_round_ss:
1949 ; X64: ## %bb.0: ## %entry
1950 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1951 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xc2]
1952 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1953 ; X64-NEXT: retq ## encoding: [0xc3]
1955 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %__A, <4 x float> %__B, i8 %__U, i32 4)
1959 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1
1961 define <4 x float> @test_mm_maskz_fmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
1962 ; X86-LABEL: test_mm_maskz_fmadd_ss:
1963 ; X86: ## %bb.0: ## %entry
1964 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
1965 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1966 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
1967 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1968 ; X86-NEXT: retl ## encoding: [0xc3]
1970 ; X64-LABEL: test_mm_maskz_fmadd_ss:
1971 ; X64: ## %bb.0: ## %entry
1972 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
1973 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
1974 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1975 ; X64-NEXT: retq ## encoding: [0xc3]
1977 %0 = extractelement <4 x float> %__A, i64 0
1978 %1 = extractelement <4 x float> %__B, i64 0
1979 %2 = extractelement <4 x float> %__C, i64 0
1980 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
1982 %tobool.i = icmp eq i8 %4, 0
1983 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3
1984 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
1985 ret <4 x float> %vecins.i
1988 define <4 x float> @test_mm_maskz_fmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
1989 ; X86-LABEL: test_mm_maskz_fmadd_round_ss:
1990 ; X86: ## %bb.0: ## %entry
1991 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
1992 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
1993 ; X86-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
1994 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
1995 ; X86-NEXT: retl ## encoding: [0xc3]
1997 ; X64-LABEL: test_mm_maskz_fmadd_round_ss:
1998 ; X64: ## %bb.0: ## %entry
1999 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2000 ; X64-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
2001 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2002 ; X64-NEXT: retq ## encoding: [0xc3]
2004 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__C, i8 %__U, i32 4)
2008 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1
2010 define <4 x float> @test_mm_mask3_fmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2011 ; X86-LABEL: test_mm_mask3_fmadd_ss:
2012 ; X86: ## %bb.0: ## %entry
2013 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2014 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2015 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
2016 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2017 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2018 ; X86-NEXT: retl ## encoding: [0xc3]
2020 ; X64-LABEL: test_mm_mask3_fmadd_ss:
2021 ; X64: ## %bb.0: ## %entry
2022 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2023 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
2024 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2025 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2026 ; X64-NEXT: retq ## encoding: [0xc3]
2028 %0 = extractelement <4 x float> %__W, i64 0
2029 %1 = extractelement <4 x float> %__X, i64 0
2030 %2 = extractelement <4 x float> %__Y, i64 0
2031 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2033 %tobool.i = icmp eq i8 %4, 0
2034 %vecext1.i = extractelement <4 x float> %__Y, i32 0
2035 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3
2036 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0
2037 ret <4 x float> %vecins.i
2040 define <4 x float> @test_mm_mask3_fmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2041 ; X86-LABEL: test_mm_mask3_fmadd_round_ss:
2042 ; X86: ## %bb.0: ## %entry
2043 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2044 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2045 ; X86-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
2046 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2047 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2048 ; X86-NEXT: retl ## encoding: [0xc3]
2050 ; X64-LABEL: test_mm_mask3_fmadd_round_ss:
2051 ; X64: ## %bb.0: ## %entry
2052 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2053 ; X64-NEXT: vfmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xd1]
2054 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2055 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2056 ; X64-NEXT: retq ## encoding: [0xc3]
2058 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 %__U, i32 4)
2062 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1
2064 define <4 x float> @test_mm_mask_fmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
2065 ; X86-LABEL: test_mm_mask_fmsub_ss:
2066 ; X86: ## %bb.0: ## %entry
2067 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2068 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2069 ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
2070 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2071 ; X86-NEXT: retl ## encoding: [0xc3]
2073 ; X64-LABEL: test_mm_mask_fmsub_ss:
2074 ; X64: ## %bb.0: ## %entry
2075 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2076 ; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
2077 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2078 ; X64-NEXT: retq ## encoding: [0xc3]
2080 %0 = extractelement <4 x float> %__W, i64 0
2081 %1 = extractelement <4 x float> %__A, i64 0
2082 %.rhs.i = extractelement <4 x float> %__B, i64 0
2083 %2 = fsub float -0.000000e+00, %.rhs.i
2084 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2086 %tobool.i = icmp eq i8 %4, 0
2087 %vecext1.i = extractelement <4 x float> %__W, i32 0
2088 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3
2089 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0
2090 ret <4 x float> %vecins.i
2093 define <4 x float> @test_mm_mask_fmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
2094 ; X86-LABEL: test_mm_mask_fmsub_round_ss:
2095 ; X86: ## %bb.0: ## %entry
2096 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2097 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2098 ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
2099 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2100 ; X86-NEXT: retl ## encoding: [0xc3]
2102 ; X64-LABEL: test_mm_mask_fmsub_round_ss:
2103 ; X64: ## %bb.0: ## %entry
2104 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2105 ; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xab,0xc2]
2106 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2107 ; X64-NEXT: retq ## encoding: [0xc3]
2109 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
2110 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %__A, <4 x float> %sub, i8 %__U, i32 4)
2114 define <4 x float> @test_mm_maskz_fmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
2115 ; X86-LABEL: test_mm_maskz_fmsub_ss:
2116 ; X86: ## %bb.0: ## %entry
2117 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2118 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2119 ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
2120 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2121 ; X86-NEXT: retl ## encoding: [0xc3]
2123 ; X64-LABEL: test_mm_maskz_fmsub_ss:
2124 ; X64: ## %bb.0: ## %entry
2125 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2126 ; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
2127 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2128 ; X64-NEXT: retq ## encoding: [0xc3]
2130 %0 = extractelement <4 x float> %__A, i64 0
2131 %1 = extractelement <4 x float> %__B, i64 0
2132 %.rhs.i = extractelement <4 x float> %__C, i64 0
2133 %2 = fsub float -0.000000e+00, %.rhs.i
2134 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2136 %tobool.i = icmp eq i8 %4, 0
2137 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3
2138 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
2139 ret <4 x float> %vecins.i
2142 define <4 x float> @test_mm_maskz_fmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
2143 ; X86-LABEL: test_mm_maskz_fmsub_round_ss:
2144 ; X86: ## %bb.0: ## %entry
2145 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2146 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2147 ; X86-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
2148 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2149 ; X86-NEXT: retl ## encoding: [0xc3]
2151 ; X64-LABEL: test_mm_maskz_fmsub_round_ss:
2152 ; X64: ## %bb.0: ## %entry
2153 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2154 ; X64-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xab,0xc2]
2155 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2156 ; X64-NEXT: retq ## encoding: [0xc3]
2158 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
2159 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %sub, i8 %__U, i32 4)
2163 define <4 x float> @test_mm_mask3_fmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2164 ; X86-LABEL: test_mm_mask3_fmsub_ss:
2165 ; X86: ## %bb.0: ## %entry
2166 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2167 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2168 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
2169 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2170 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2171 ; X86-NEXT: retl ## encoding: [0xc3]
2173 ; X64-LABEL: test_mm_mask3_fmsub_ss:
2174 ; X64: ## %bb.0: ## %entry
2175 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2176 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
2177 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2178 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2179 ; X64-NEXT: retq ## encoding: [0xc3]
2181 %0 = extractelement <4 x float> %__W, i64 0
2182 %1 = extractelement <4 x float> %__X, i64 0
2183 %.rhs.i = extractelement <4 x float> %__Y, i64 0
2184 %2 = fsub float -0.000000e+00, %.rhs.i
2185 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2187 %tobool.i = icmp eq i8 %4, 0
2188 %vecext1.i = extractelement <4 x float> %__Y, i32 0
2189 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3
2190 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0
2191 ret <4 x float> %vecins.i
2194 define <4 x float> @test_mm_mask3_fmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2195 ; X86-LABEL: test_mm_mask3_fmsub_round_ss:
2196 ; X86: ## %bb.0: ## %entry
2197 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2198 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2199 ; X86-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
2200 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2201 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2202 ; X86-NEXT: retl ## encoding: [0xc3]
2204 ; X64-LABEL: test_mm_mask3_fmsub_round_ss:
2205 ; X64: ## %bb.0: ## %entry
2206 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2207 ; X64-NEXT: vfmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xd1]
2208 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2209 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2210 ; X64-NEXT: retq ## encoding: [0xc3]
2212 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 %__U, i32 4)
2216 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) #1
2218 define <4 x float> @test_mm_mask_fnmadd_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
2219 ; X86-LABEL: test_mm_mask_fnmadd_ss:
2220 ; X86: ## %bb.0: ## %entry
2221 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2222 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2223 ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
2224 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2225 ; X86-NEXT: retl ## encoding: [0xc3]
2227 ; X64-LABEL: test_mm_mask_fnmadd_ss:
2228 ; X64: ## %bb.0: ## %entry
2229 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2230 ; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
2231 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2232 ; X64-NEXT: retq ## encoding: [0xc3]
2234 %0 = extractelement <4 x float> %__W, i64 0
2235 %.rhs.i = extractelement <4 x float> %__A, i64 0
2236 %1 = fsub float -0.000000e+00, %.rhs.i
2237 %2 = extractelement <4 x float> %__B, i64 0
2238 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2240 %tobool.i = icmp eq i8 %4, 0
2241 %vecext1.i = extractelement <4 x float> %__W, i32 0
2242 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3
2243 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0
2244 ret <4 x float> %vecins.i
2247 define <4 x float> @test_mm_mask_fnmadd_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
2248 ; X86-LABEL: test_mm_mask_fnmadd_round_ss:
2249 ; X86: ## %bb.0: ## %entry
2250 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2251 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2252 ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
2253 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2254 ; X86-NEXT: retl ## encoding: [0xc3]
2256 ; X64-LABEL: test_mm_mask_fnmadd_round_ss:
2257 ; X64: ## %bb.0: ## %entry
2258 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2259 ; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xad,0xc2]
2260 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2261 ; X64-NEXT: retq ## encoding: [0xc3]
2263 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
2264 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %__B, i8 %__U, i32 4)
2268 define <4 x float> @test_mm_maskz_fnmadd_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
2269 ; X86-LABEL: test_mm_maskz_fnmadd_ss:
2270 ; X86: ## %bb.0: ## %entry
2271 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2272 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2273 ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
2274 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2275 ; X86-NEXT: retl ## encoding: [0xc3]
2277 ; X64-LABEL: test_mm_maskz_fnmadd_ss:
2278 ; X64: ## %bb.0: ## %entry
2279 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2280 ; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
2281 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2282 ; X64-NEXT: retq ## encoding: [0xc3]
2284 %0 = extractelement <4 x float> %__A, i64 0
2285 %.rhs.i = extractelement <4 x float> %__B, i64 0
2286 %1 = fsub float -0.000000e+00, %.rhs.i
2287 %2 = extractelement <4 x float> %__C, i64 0
2288 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2290 %tobool.i = icmp eq i8 %4, 0
2291 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3
2292 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
2293 ret <4 x float> %vecins.i
2296 define <4 x float> @test_mm_maskz_fnmadd_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
2297 ; X86-LABEL: test_mm_maskz_fnmadd_round_ss:
2298 ; X86: ## %bb.0: ## %entry
2299 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2300 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2301 ; X86-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
2302 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2303 ; X86-NEXT: retl ## encoding: [0xc3]
2305 ; X64-LABEL: test_mm_maskz_fnmadd_round_ss:
2306 ; X64: ## %bb.0: ## %entry
2307 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2308 ; X64-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xad,0xc2]
2309 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2310 ; X64-NEXT: retq ## encoding: [0xc3]
2312 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
2313 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %sub, <4 x float> %__C, i8 %__U, i32 4)
2317 define <4 x float> @test_mm_mask3_fnmadd_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2318 ; X86-LABEL: test_mm_mask3_fnmadd_ss:
2319 ; X86: ## %bb.0: ## %entry
2320 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2321 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2322 ; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
2323 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2324 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2325 ; X86-NEXT: retl ## encoding: [0xc3]
2327 ; X64-LABEL: test_mm_mask3_fnmadd_ss:
2328 ; X64: ## %bb.0: ## %entry
2329 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2330 ; X64-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
2331 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2332 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2333 ; X64-NEXT: retq ## encoding: [0xc3]
2335 %0 = extractelement <4 x float> %__W, i64 0
2336 %.rhs.i = extractelement <4 x float> %__X, i64 0
2337 %1 = fsub float -0.000000e+00, %.rhs.i
2338 %2 = extractelement <4 x float> %__Y, i64 0
2339 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2341 %tobool.i = icmp eq i8 %4, 0
2342 %vecext1.i = extractelement <4 x float> %__Y, i32 0
2343 %cond.i = select i1 %tobool.i, float %vecext1.i, float %3
2344 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0
2345 ret <4 x float> %vecins.i
2348 define <4 x float> @test_mm_mask3_fnmadd_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2349 ; X86-LABEL: test_mm_mask3_fnmadd_round_ss:
2350 ; X86: ## %bb.0: ## %entry
2351 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2352 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2353 ; X86-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
2354 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2355 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2356 ; X86-NEXT: retl ## encoding: [0xc3]
2358 ; X64-LABEL: test_mm_mask3_fnmadd_round_ss:
2359 ; X64: ## %bb.0: ## %entry
2360 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2361 ; X64-NEXT: vfnmadd231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbd,0xd1]
2362 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2363 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2364 ; X64-NEXT: retq ## encoding: [0xc3]
2366 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__X
2367 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %__Y, i8 %__U, i32 4)
2371 define <4 x float> @test_mm_mask_fnmsub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
2372 ; X86-LABEL: test_mm_mask_fnmsub_ss:
2373 ; X86: ## %bb.0: ## %entry
2374 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2375 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2376 ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
2377 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2378 ; X86-NEXT: retl ## encoding: [0xc3]
2380 ; X64-LABEL: test_mm_mask_fnmsub_ss:
2381 ; X64: ## %bb.0: ## %entry
2382 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2383 ; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
2384 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2385 ; X64-NEXT: retq ## encoding: [0xc3]
2387 %0 = extractelement <4 x float> %__W, i64 0
2388 %.rhs.i = extractelement <4 x float> %__A, i64 0
2389 %1 = fsub float -0.000000e+00, %.rhs.i
2390 %.rhs7.i = extractelement <4 x float> %__B, i64 0
2391 %2 = fsub float -0.000000e+00, %.rhs7.i
2392 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2394 %tobool.i = icmp eq i8 %4, 0
2395 %vecext2.i = extractelement <4 x float> %__W, i32 0
2396 %cond.i = select i1 %tobool.i, float %vecext2.i, float %3
2397 %vecins.i = insertelement <4 x float> %__W, float %cond.i, i32 0
2398 ret <4 x float> %vecins.i
2401 define <4 x float> @test_mm_mask_fnmsub_round_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
2402 ; X86-LABEL: test_mm_mask_fnmsub_round_ss:
2403 ; X86: ## %bb.0: ## %entry
2404 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2405 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2406 ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
2407 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2408 ; X86-NEXT: retl ## encoding: [0xc3]
2410 ; X64-LABEL: test_mm_mask_fnmsub_round_ss:
2411 ; X64: ## %bb.0: ## %entry
2412 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2413 ; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xaf,0xc2]
2414 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2415 ; X64-NEXT: retq ## encoding: [0xc3]
2417 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__A
2418 %sub1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
2419 %0 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %sub1, i8 %__U, i32 4)
2423 define <4 x float> @test_mm_maskz_fnmsub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
2424 ; X86-LABEL: test_mm_maskz_fnmsub_ss:
2425 ; X86: ## %bb.0: ## %entry
2426 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2427 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2428 ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
2429 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2430 ; X86-NEXT: retl ## encoding: [0xc3]
2432 ; X64-LABEL: test_mm_maskz_fnmsub_ss:
2433 ; X64: ## %bb.0: ## %entry
2434 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2435 ; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
2436 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2437 ; X64-NEXT: retq ## encoding: [0xc3]
2439 %0 = extractelement <4 x float> %__A, i64 0
2440 %.rhs.i = extractelement <4 x float> %__B, i64 0
2441 %1 = fsub float -0.000000e+00, %.rhs.i
2442 %.rhs5.i = extractelement <4 x float> %__C, i64 0
2443 %2 = fsub float -0.000000e+00, %.rhs5.i
2444 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2446 %tobool.i = icmp eq i8 %4, 0
2447 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %3
2448 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
2449 ret <4 x float> %vecins.i
2452 define <4 x float> @test_mm_maskz_fnmsub_round_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B, <4 x float> %__C) {
2453 ; X86-LABEL: test_mm_maskz_fnmsub_round_ss:
2454 ; X86: ## %bb.0: ## %entry
2455 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2456 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2457 ; X86-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
2458 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2459 ; X86-NEXT: retl ## encoding: [0xc3]
2461 ; X64-LABEL: test_mm_maskz_fnmsub_round_ss:
2462 ; X64: ## %bb.0: ## %entry
2463 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2464 ; X64-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xaf,0xc2]
2465 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2466 ; X64-NEXT: retq ## encoding: [0xc3]
2468 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__B
2469 %sub1 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__C
2470 %0 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %__A, <4 x float> %sub, <4 x float> %sub1, i8 %__U, i32 4)
2474 define <4 x float> @test_mm_mask3_fnmsub_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2475 ; X86-LABEL: test_mm_mask3_fnmsub_ss:
2476 ; X86: ## %bb.0: ## %entry
2477 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2478 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2479 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
2480 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
2481 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2482 ; X86-NEXT: retl ## encoding: [0xc3]
2484 ; X64-LABEL: test_mm_mask3_fnmsub_ss:
2485 ; X64: ## %bb.0: ## %entry
2486 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2487 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
2488 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
2489 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2490 ; X64-NEXT: retq ## encoding: [0xc3]
2492 %0 = extractelement <4 x float> %__W, i64 0
2493 %.rhs.i = extractelement <4 x float> %__X, i64 0
2494 %1 = fsub float -0.000000e+00, %.rhs.i
2495 %.rhs7.i = extractelement <4 x float> %__Y, i64 0
2496 %2 = fsub float -0.000000e+00, %.rhs7.i
2497 %3 = tail call float @llvm.fma.f32(float %0, float %1, float %2) #10
2499 %tobool.i = icmp eq i8 %4, 0
2500 %vecext2.i = extractelement <4 x float> %__Y, i32 0
2501 %cond.i = select i1 %tobool.i, float %vecext2.i, float %3
2502 %vecins.i = insertelement <4 x float> %__Y, float %cond.i, i32 0
2503 ret <4 x float> %vecins.i
2506 define <4 x float> @test_mm_mask3_fnmsub_round_ss(<4 x float> %__W, <4 x float> %__X, <4 x float> %__Y, i8 zeroext %__U) {
2507 ; X86-LABEL: test_mm_mask3_fnmsub_round_ss:
2508 ; X86: ## %bb.0: ## %entry
2509 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2510 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2511 ; X86-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
2512 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
2513 ; X86-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2514 ; X86-NEXT: retl ## encoding: [0xc3]
2516 ; X64-LABEL: test_mm_mask3_fnmsub_round_ss:
2517 ; X64: ## %bb.0: ## %entry
2518 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2519 ; X64-NEXT: vfnmsub231ss %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xd1]
2520 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
2521 ; X64-NEXT: vmovaps %xmm2, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc2]
2522 ; X64-NEXT: retq ## encoding: [0xc3]
2524 %sub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %__X
2525 %0 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %__W, <4 x float> %sub, <4 x float> %__Y, i8 %__U, i32 4)
2529 define <2 x double> @test_mm_mask_fmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2530 ; X86-LABEL: test_mm_mask_fmadd_sd:
2531 ; X86: ## %bb.0: ## %entry
2532 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2533 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2534 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
2535 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2536 ; X86-NEXT: retl ## encoding: [0xc3]
2538 ; X64-LABEL: test_mm_mask_fmadd_sd:
2539 ; X64: ## %bb.0: ## %entry
2540 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2541 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
2542 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2543 ; X64-NEXT: retq ## encoding: [0xc3]
2545 %0 = extractelement <2 x double> %__W, i64 0
2546 %1 = extractelement <2 x double> %__A, i64 0
2547 %2 = extractelement <2 x double> %__B, i64 0
2548 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2550 %tobool.i = icmp eq i8 %4, 0
2551 %vecext1.i = extractelement <2 x double> %__W, i32 0
2552 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3
2553 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0
2554 ret <2 x double> %vecins.i
2557 define <2 x double> @test_mm_mask_fmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2558 ; X86-LABEL: test_mm_mask_fmadd_round_sd:
2559 ; X86: ## %bb.0: ## %entry
2560 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2561 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2562 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
2563 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2564 ; X86-NEXT: retl ## encoding: [0xc3]
2566 ; X64-LABEL: test_mm_mask_fmadd_round_sd:
2567 ; X64: ## %bb.0: ## %entry
2568 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2569 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xc2]
2570 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2571 ; X64-NEXT: retq ## encoding: [0xc3]
2573 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %__A, <2 x double> %__B, i8 %__U, i32 4)
2577 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1
2579 define <2 x double> @test_mm_maskz_fmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
2580 ; X86-LABEL: test_mm_maskz_fmadd_sd:
2581 ; X86: ## %bb.0: ## %entry
2582 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2583 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2584 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
2585 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2586 ; X86-NEXT: retl ## encoding: [0xc3]
2588 ; X64-LABEL: test_mm_maskz_fmadd_sd:
2589 ; X64: ## %bb.0: ## %entry
2590 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2591 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
2592 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2593 ; X64-NEXT: retq ## encoding: [0xc3]
2595 %0 = extractelement <2 x double> %__A, i64 0
2596 %1 = extractelement <2 x double> %__B, i64 0
2597 %2 = extractelement <2 x double> %__C, i64 0
2598 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2600 %tobool.i = icmp eq i8 %4, 0
2601 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3
2602 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
2603 ret <2 x double> %vecins.i
2606 define <2 x double> @test_mm_maskz_fmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
2607 ; X86-LABEL: test_mm_maskz_fmadd_round_sd:
2608 ; X86: ## %bb.0: ## %entry
2609 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2610 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2611 ; X86-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
2612 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2613 ; X86-NEXT: retl ## encoding: [0xc3]
2615 ; X64-LABEL: test_mm_maskz_fmadd_round_sd:
2616 ; X64: ## %bb.0: ## %entry
2617 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2618 ; X64-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xc2]
2619 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) + xmm2
2620 ; X64-NEXT: retq ## encoding: [0xc3]
2622 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__C, i8 %__U, i32 4)
2626 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1
2628 define <2 x double> @test_mm_mask3_fmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
2629 ; X86-LABEL: test_mm_mask3_fmadd_sd:
2630 ; X86: ## %bb.0: ## %entry
2631 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2632 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2633 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
2634 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2635 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2636 ; X86-NEXT: retl ## encoding: [0xc3]
2638 ; X64-LABEL: test_mm_mask3_fmadd_sd:
2639 ; X64: ## %bb.0: ## %entry
2640 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2641 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
2642 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2643 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2644 ; X64-NEXT: retq ## encoding: [0xc3]
2646 %0 = extractelement <2 x double> %__W, i64 0
2647 %1 = extractelement <2 x double> %__X, i64 0
2648 %2 = extractelement <2 x double> %__Y, i64 0
2649 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2651 %tobool.i = icmp eq i8 %4, 0
2652 %vecext1.i = extractelement <2 x double> %__Y, i32 0
2653 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3
2654 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0
2655 ret <2 x double> %vecins.i
2658 define <2 x double> @test_mm_mask3_fmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
2659 ; X86-LABEL: test_mm_mask3_fmadd_round_sd:
2660 ; X86: ## %bb.0: ## %entry
2661 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2662 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2663 ; X86-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
2664 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2665 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2666 ; X86-NEXT: retl ## encoding: [0xc3]
2668 ; X64-LABEL: test_mm_mask3_fmadd_round_sd:
2669 ; X64: ## %bb.0: ## %entry
2670 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2671 ; X64-NEXT: vfmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xd1]
2672 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) + xmm2
2673 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2674 ; X64-NEXT: retq ## encoding: [0xc3]
2676 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 %__U, i32 4)
2680 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1
2682 define <2 x double> @test_mm_mask_fmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2683 ; X86-LABEL: test_mm_mask_fmsub_sd:
2684 ; X86: ## %bb.0: ## %entry
2685 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2686 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2687 ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
2688 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2689 ; X86-NEXT: retl ## encoding: [0xc3]
2691 ; X64-LABEL: test_mm_mask_fmsub_sd:
2692 ; X64: ## %bb.0: ## %entry
2693 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2694 ; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
2695 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2696 ; X64-NEXT: retq ## encoding: [0xc3]
2698 %0 = extractelement <2 x double> %__W, i64 0
2699 %1 = extractelement <2 x double> %__A, i64 0
2700 %.rhs.i = extractelement <2 x double> %__B, i64 0
2701 %2 = fsub double -0.000000e+00, %.rhs.i
2702 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2704 %tobool.i = icmp eq i8 %4, 0
2705 %vecext1.i = extractelement <2 x double> %__W, i32 0
2706 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3
2707 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0
2708 ret <2 x double> %vecins.i
2711 define <2 x double> @test_mm_mask_fmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2712 ; X86-LABEL: test_mm_mask_fmsub_round_sd:
2713 ; X86: ## %bb.0: ## %entry
2714 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2715 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2716 ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
2717 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2718 ; X86-NEXT: retl ## encoding: [0xc3]
2720 ; X64-LABEL: test_mm_mask_fmsub_round_sd:
2721 ; X64: ## %bb.0: ## %entry
2722 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2723 ; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xab,0xc2]
2724 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2725 ; X64-NEXT: retq ## encoding: [0xc3]
2727 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B
2728 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %__A, <2 x double> %sub, i8 %__U, i32 4)
2732 define <2 x double> @test_mm_maskz_fmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
2733 ; X86-LABEL: test_mm_maskz_fmsub_sd:
2734 ; X86: ## %bb.0: ## %entry
2735 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2736 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2737 ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
2738 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2739 ; X86-NEXT: retl ## encoding: [0xc3]
2741 ; X64-LABEL: test_mm_maskz_fmsub_sd:
2742 ; X64: ## %bb.0: ## %entry
2743 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2744 ; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
2745 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2746 ; X64-NEXT: retq ## encoding: [0xc3]
2748 %0 = extractelement <2 x double> %__A, i64 0
2749 %1 = extractelement <2 x double> %__B, i64 0
2750 %.rhs.i = extractelement <2 x double> %__C, i64 0
2751 %2 = fsub double -0.000000e+00, %.rhs.i
2752 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2754 %tobool.i = icmp eq i8 %4, 0
2755 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3
2756 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
2757 ret <2 x double> %vecins.i
2760 define <2 x double> @test_mm_maskz_fmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
2761 ; X86-LABEL: test_mm_maskz_fmsub_round_sd:
2762 ; X86: ## %bb.0: ## %entry
2763 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2764 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2765 ; X86-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
2766 ; X86-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2767 ; X86-NEXT: retl ## encoding: [0xc3]
2769 ; X64-LABEL: test_mm_maskz_fmsub_round_sd:
2770 ; X64: ## %bb.0: ## %entry
2771 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2772 ; X64-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xab,0xc2]
2773 ; X64-NEXT: ## xmm0 = (xmm1 * xmm0) - xmm2
2774 ; X64-NEXT: retq ## encoding: [0xc3]
2776 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__C
2777 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %sub, i8 %__U, i32 4)
2781 define <2 x double> @test_mm_mask3_fmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
2782 ; X86-LABEL: test_mm_mask3_fmsub_sd:
2783 ; X86: ## %bb.0: ## %entry
2784 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2785 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2786 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
2787 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2788 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2789 ; X86-NEXT: retl ## encoding: [0xc3]
2791 ; X64-LABEL: test_mm_mask3_fmsub_sd:
2792 ; X64: ## %bb.0: ## %entry
2793 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2794 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
2795 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2796 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2797 ; X64-NEXT: retq ## encoding: [0xc3]
2799 %0 = extractelement <2 x double> %__W, i64 0
2800 %1 = extractelement <2 x double> %__X, i64 0
2801 %.rhs.i = extractelement <2 x double> %__Y, i64 0
2802 %2 = fsub double -0.000000e+00, %.rhs.i
2803 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2805 %tobool.i = icmp eq i8 %4, 0
2806 %vecext1.i = extractelement <2 x double> %__Y, i32 0
2807 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3
2808 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0
2809 ret <2 x double> %vecins.i
2812 define <2 x double> @test_mm_mask3_fmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
2813 ; X86-LABEL: test_mm_mask3_fmsub_round_sd:
2814 ; X86: ## %bb.0: ## %entry
2815 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2816 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2817 ; X86-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
2818 ; X86-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2819 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2820 ; X86-NEXT: retl ## encoding: [0xc3]
2822 ; X64-LABEL: test_mm_mask3_fmsub_round_sd:
2823 ; X64: ## %bb.0: ## %entry
2824 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2825 ; X64-NEXT: vfmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xd1]
2826 ; X64-NEXT: ## xmm2 = (xmm0 * xmm1) - xmm2
2827 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2828 ; X64-NEXT: retq ## encoding: [0xc3]
2830 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 %__U, i32 4)
2834 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) #1
2836 define <2 x double> @test_mm_mask_fnmadd_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2837 ; X86-LABEL: test_mm_mask_fnmadd_sd:
2838 ; X86: ## %bb.0: ## %entry
2839 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2840 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2841 ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
2842 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2843 ; X86-NEXT: retl ## encoding: [0xc3]
2845 ; X64-LABEL: test_mm_mask_fnmadd_sd:
2846 ; X64: ## %bb.0: ## %entry
2847 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2848 ; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
2849 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2850 ; X64-NEXT: retq ## encoding: [0xc3]
2852 %0 = extractelement <2 x double> %__W, i64 0
2853 %.rhs.i = extractelement <2 x double> %__A, i64 0
2854 %1 = fsub double -0.000000e+00, %.rhs.i
2855 %2 = extractelement <2 x double> %__B, i64 0
2856 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2858 %tobool.i = icmp eq i8 %4, 0
2859 %vecext1.i = extractelement <2 x double> %__W, i32 0
2860 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3
2861 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0
2862 ret <2 x double> %vecins.i
2865 define <2 x double> @test_mm_mask_fnmadd_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2866 ; X86-LABEL: test_mm_mask_fnmadd_round_sd:
2867 ; X86: ## %bb.0: ## %entry
2868 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2869 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2870 ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
2871 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2872 ; X86-NEXT: retl ## encoding: [0xc3]
2874 ; X64-LABEL: test_mm_mask_fnmadd_round_sd:
2875 ; X64: ## %bb.0: ## %entry
2876 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2877 ; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xad,0xc2]
2878 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2879 ; X64-NEXT: retq ## encoding: [0xc3]
2881 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__A
2882 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %__B, i8 %__U, i32 4)
2886 define <2 x double> @test_mm_maskz_fnmadd_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
2887 ; X86-LABEL: test_mm_maskz_fnmadd_sd:
2888 ; X86: ## %bb.0: ## %entry
2889 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2890 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2891 ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
2892 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2893 ; X86-NEXT: retl ## encoding: [0xc3]
2895 ; X64-LABEL: test_mm_maskz_fnmadd_sd:
2896 ; X64: ## %bb.0: ## %entry
2897 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2898 ; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
2899 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2900 ; X64-NEXT: retq ## encoding: [0xc3]
2902 %0 = extractelement <2 x double> %__A, i64 0
2903 %.rhs.i = extractelement <2 x double> %__B, i64 0
2904 %1 = fsub double -0.000000e+00, %.rhs.i
2905 %2 = extractelement <2 x double> %__C, i64 0
2906 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2908 %tobool.i = icmp eq i8 %4, 0
2909 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3
2910 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
2911 ret <2 x double> %vecins.i
2914 define <2 x double> @test_mm_maskz_fnmadd_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
2915 ; X86-LABEL: test_mm_maskz_fnmadd_round_sd:
2916 ; X86: ## %bb.0: ## %entry
2917 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2918 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2919 ; X86-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
2920 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2921 ; X86-NEXT: retl ## encoding: [0xc3]
2923 ; X64-LABEL: test_mm_maskz_fnmadd_round_sd:
2924 ; X64: ## %bb.0: ## %entry
2925 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2926 ; X64-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xad,0xc2]
2927 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) + xmm2
2928 ; X64-NEXT: retq ## encoding: [0xc3]
2930 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B
2931 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %sub, <2 x double> %__C, i8 %__U, i32 4)
2935 define <2 x double> @test_mm_mask3_fnmadd_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
2936 ; X86-LABEL: test_mm_mask3_fnmadd_sd:
2937 ; X86: ## %bb.0: ## %entry
2938 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2939 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2940 ; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
2941 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2942 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2943 ; X86-NEXT: retl ## encoding: [0xc3]
2945 ; X64-LABEL: test_mm_mask3_fnmadd_sd:
2946 ; X64: ## %bb.0: ## %entry
2947 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2948 ; X64-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
2949 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2950 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2951 ; X64-NEXT: retq ## encoding: [0xc3]
2953 %0 = extractelement <2 x double> %__W, i64 0
2954 %.rhs.i = extractelement <2 x double> %__X, i64 0
2955 %1 = fsub double -0.000000e+00, %.rhs.i
2956 %2 = extractelement <2 x double> %__Y, i64 0
2957 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
2959 %tobool.i = icmp eq i8 %4, 0
2960 %vecext1.i = extractelement <2 x double> %__Y, i32 0
2961 %cond.i = select i1 %tobool.i, double %vecext1.i, double %3
2962 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0
2963 ret <2 x double> %vecins.i
2966 define <2 x double> @test_mm_mask3_fnmadd_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
2967 ; X86-LABEL: test_mm_mask3_fnmadd_round_sd:
2968 ; X86: ## %bb.0: ## %entry
2969 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2970 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2971 ; X86-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
2972 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2973 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2974 ; X86-NEXT: retl ## encoding: [0xc3]
2976 ; X64-LABEL: test_mm_mask3_fnmadd_round_sd:
2977 ; X64: ## %bb.0: ## %entry
2978 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
2979 ; X64-NEXT: vfnmadd231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbd,0xd1]
2980 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) + xmm2
2981 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
2982 ; X64-NEXT: retq ## encoding: [0xc3]
2984 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__X
2985 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %__Y, i8 %__U, i32 4)
2989 define <2 x double> @test_mm_mask_fnmsub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
2990 ; X86-LABEL: test_mm_mask_fnmsub_sd:
2991 ; X86: ## %bb.0: ## %entry
2992 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
2993 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
2994 ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
2995 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
2996 ; X86-NEXT: retl ## encoding: [0xc3]
2998 ; X64-LABEL: test_mm_mask_fnmsub_sd:
2999 ; X64: ## %bb.0: ## %entry
3000 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3001 ; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
3002 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3003 ; X64-NEXT: retq ## encoding: [0xc3]
3005 %0 = extractelement <2 x double> %__W, i64 0
3006 %.rhs.i = extractelement <2 x double> %__A, i64 0
3007 %1 = fsub double -0.000000e+00, %.rhs.i
3008 %.rhs7.i = extractelement <2 x double> %__B, i64 0
3009 %2 = fsub double -0.000000e+00, %.rhs7.i
3010 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
3012 %tobool.i = icmp eq i8 %4, 0
3013 %vecext2.i = extractelement <2 x double> %__W, i32 0
3014 %cond.i = select i1 %tobool.i, double %vecext2.i, double %3
3015 %vecins.i = insertelement <2 x double> %__W, double %cond.i, i32 0
3016 ret <2 x double> %vecins.i
3019 define <2 x double> @test_mm_mask_fnmsub_round_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3020 ; X86-LABEL: test_mm_mask_fnmsub_round_sd:
3021 ; X86: ## %bb.0: ## %entry
3022 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3023 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3024 ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
3025 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3026 ; X86-NEXT: retl ## encoding: [0xc3]
3028 ; X64-LABEL: test_mm_mask_fnmsub_round_sd:
3029 ; X64: ## %bb.0: ## %entry
3030 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3031 ; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xaf,0xc2]
3032 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3033 ; X64-NEXT: retq ## encoding: [0xc3]
3035 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__A
3036 %sub1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B
3037 %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %sub1, i8 %__U, i32 4)
3041 define <2 x double> @test_mm_maskz_fnmsub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
3042 ; X86-LABEL: test_mm_maskz_fnmsub_sd:
3043 ; X86: ## %bb.0: ## %entry
3044 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3045 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3046 ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
3047 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3048 ; X86-NEXT: retl ## encoding: [0xc3]
3050 ; X64-LABEL: test_mm_maskz_fnmsub_sd:
3051 ; X64: ## %bb.0: ## %entry
3052 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3053 ; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
3054 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3055 ; X64-NEXT: retq ## encoding: [0xc3]
3057 %0 = extractelement <2 x double> %__A, i64 0
3058 %.rhs.i = extractelement <2 x double> %__B, i64 0
3059 %1 = fsub double -0.000000e+00, %.rhs.i
3060 %.rhs5.i = extractelement <2 x double> %__C, i64 0
3061 %2 = fsub double -0.000000e+00, %.rhs5.i
3062 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
3064 %tobool.i = icmp eq i8 %4, 0
3065 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %3
3066 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3067 ret <2 x double> %vecins.i
3070 define <2 x double> @test_mm_maskz_fnmsub_round_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B, <2 x double> %__C) {
3071 ; X86-LABEL: test_mm_maskz_fnmsub_round_sd:
3072 ; X86: ## %bb.0: ## %entry
3073 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3074 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3075 ; X86-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
3076 ; X86-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3077 ; X86-NEXT: retl ## encoding: [0xc3]
3079 ; X64-LABEL: test_mm_maskz_fnmsub_round_sd:
3080 ; X64: ## %bb.0: ## %entry
3081 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3082 ; X64-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xaf,0xc2]
3083 ; X64-NEXT: ## xmm0 = -(xmm1 * xmm0) - xmm2
3084 ; X64-NEXT: retq ## encoding: [0xc3]
3086 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__B
3087 %sub1 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__C
3088 %0 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %__A, <2 x double> %sub, <2 x double> %sub1, i8 %__U, i32 4)
3092 define <2 x double> @test_mm_mask3_fnmsub_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
3093 ; X86-LABEL: test_mm_mask3_fnmsub_sd:
3094 ; X86: ## %bb.0: ## %entry
3095 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3096 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3097 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
3098 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
3099 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
3100 ; X86-NEXT: retl ## encoding: [0xc3]
3102 ; X64-LABEL: test_mm_mask3_fnmsub_sd:
3103 ; X64: ## %bb.0: ## %entry
3104 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3105 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
3106 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
3107 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
3108 ; X64-NEXT: retq ## encoding: [0xc3]
3110 %0 = extractelement <2 x double> %__W, i64 0
3111 %.rhs.i = extractelement <2 x double> %__X, i64 0
3112 %1 = fsub double -0.000000e+00, %.rhs.i
3113 %.rhs7.i = extractelement <2 x double> %__Y, i64 0
3114 %2 = fsub double -0.000000e+00, %.rhs7.i
3115 %3 = tail call double @llvm.fma.f64(double %0, double %1, double %2) #10
3117 %tobool.i = icmp eq i8 %4, 0
3118 %vecext2.i = extractelement <2 x double> %__Y, i32 0
3119 %cond.i = select i1 %tobool.i, double %vecext2.i, double %3
3120 %vecins.i = insertelement <2 x double> %__Y, double %cond.i, i32 0
3121 ret <2 x double> %vecins.i
3124 define <2 x double> @test_mm_mask3_fnmsub_round_sd(<2 x double> %__W, <2 x double> %__X, <2 x double> %__Y, i8 zeroext %__U) {
3125 ; X86-LABEL: test_mm_mask3_fnmsub_round_sd:
3126 ; X86: ## %bb.0: ## %entry
3127 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3128 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3129 ; X86-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
3130 ; X86-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
3131 ; X86-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
3132 ; X86-NEXT: retl ## encoding: [0xc3]
3134 ; X64-LABEL: test_mm_mask3_fnmsub_round_sd:
3135 ; X64: ## %bb.0: ## %entry
3136 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3137 ; X64-NEXT: vfnmsub231sd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xd1]
3138 ; X64-NEXT: ## xmm2 = -(xmm0 * xmm1) - xmm2
3139 ; X64-NEXT: vmovapd %xmm2, %xmm0 ## encoding: [0xc5,0xf9,0x28,0xc2]
3140 ; X64-NEXT: retq ## encoding: [0xc3]
3142 %sub = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %__X
3143 %0 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %__W, <2 x double> %sub, <2 x double> %__Y, i8 %__U, i32 4)
3147 define <4 x float> @test_mm_mask_add_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3148 ; X86-LABEL: test_mm_mask_add_ss:
3149 ; X86: ## %bb.0: ## %entry
3150 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3151 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3152 ; X86-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2]
3153 ; X86-NEXT: retl ## encoding: [0xc3]
3155 ; X64-LABEL: test_mm_mask_add_ss:
3156 ; X64: ## %bb.0: ## %entry
3157 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3158 ; X64-NEXT: vaddss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x58,0xc2]
3159 ; X64-NEXT: retq ## encoding: [0xc3]
3161 %vecext.i.i = extractelement <4 x float> %__B, i32 0
3162 %vecext1.i.i = extractelement <4 x float> %__A, i32 0
3163 %add.i.i = fadd float %vecext1.i.i, %vecext.i.i
3165 %tobool.i = icmp eq i8 %0, 0
3166 %vecext1.i = extractelement <4 x float> %__W, i32 0
3167 %cond.i = select i1 %tobool.i, float %vecext1.i, float %add.i.i
3168 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
3169 ret <4 x float> %vecins.i
3172 define <4 x float> @test_mm_maskz_add_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3173 ; X86-LABEL: test_mm_maskz_add_ss:
3174 ; X86: ## %bb.0: ## %entry
3175 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3176 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3177 ; X86-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1]
3178 ; X86-NEXT: retl ## encoding: [0xc3]
3180 ; X64-LABEL: test_mm_maskz_add_ss:
3181 ; X64: ## %bb.0: ## %entry
3182 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3183 ; X64-NEXT: vaddss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x58,0xc1]
3184 ; X64-NEXT: retq ## encoding: [0xc3]
3186 %vecext.i.i = extractelement <4 x float> %__B, i32 0
3187 %vecext1.i.i = extractelement <4 x float> %__A, i32 0
3188 %add.i.i = fadd float %vecext1.i.i, %vecext.i.i
3190 %tobool.i = icmp eq i8 %0, 0
3191 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %add.i.i
3192 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
3193 ret <4 x float> %vecins.i
3196 define <2 x double> @test_mm_mask_add_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3197 ; X86-LABEL: test_mm_mask_add_sd:
3198 ; X86: ## %bb.0: ## %entry
3199 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3200 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3201 ; X86-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2]
3202 ; X86-NEXT: retl ## encoding: [0xc3]
3204 ; X64-LABEL: test_mm_mask_add_sd:
3205 ; X64: ## %bb.0: ## %entry
3206 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3207 ; X64-NEXT: vaddsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x58,0xc2]
3208 ; X64-NEXT: retq ## encoding: [0xc3]
3210 %vecext.i.i = extractelement <2 x double> %__B, i32 0
3211 %vecext1.i.i = extractelement <2 x double> %__A, i32 0
3212 %add.i.i = fadd double %vecext1.i.i, %vecext.i.i
3214 %tobool.i = icmp eq i8 %0, 0
3215 %vecext1.i = extractelement <2 x double> %__W, i32 0
3216 %cond.i = select i1 %tobool.i, double %vecext1.i, double %add.i.i
3217 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3218 ret <2 x double> %vecins.i
3221 define <2 x double> @test_mm_maskz_add_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3222 ; X86-LABEL: test_mm_maskz_add_sd:
3223 ; X86: ## %bb.0: ## %entry
3224 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3225 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3226 ; X86-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1]
3227 ; X86-NEXT: retl ## encoding: [0xc3]
3229 ; X64-LABEL: test_mm_maskz_add_sd:
3230 ; X64: ## %bb.0: ## %entry
3231 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3232 ; X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x58,0xc1]
3233 ; X64-NEXT: retq ## encoding: [0xc3]
3235 %vecext.i.i = extractelement <2 x double> %__B, i32 0
3236 %vecext1.i.i = extractelement <2 x double> %__A, i32 0
3237 %add.i.i = fadd double %vecext1.i.i, %vecext.i.i
3239 %tobool.i = icmp eq i8 %0, 0
3240 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %add.i.i
3241 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3242 ret <2 x double> %vecins.i
3245 define <4 x float> @test_mm_mask_sub_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3246 ; X86-LABEL: test_mm_mask_sub_ss:
3247 ; X86: ## %bb.0: ## %entry
3248 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3249 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3250 ; X86-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2]
3251 ; X86-NEXT: retl ## encoding: [0xc3]
3253 ; X64-LABEL: test_mm_mask_sub_ss:
3254 ; X64: ## %bb.0: ## %entry
3255 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3256 ; X64-NEXT: vsubss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5c,0xc2]
3257 ; X64-NEXT: retq ## encoding: [0xc3]
3259 %vecext.i.i = extractelement <4 x float> %__B, i32 0
3260 %vecext1.i.i = extractelement <4 x float> %__A, i32 0
3261 %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i
3263 %tobool.i = icmp eq i8 %0, 0
3264 %vecext1.i = extractelement <4 x float> %__W, i32 0
3265 %cond.i = select i1 %tobool.i, float %vecext1.i, float %sub.i.i
3266 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
3267 ret <4 x float> %vecins.i
3270 define <4 x float> @test_mm_maskz_sub_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3271 ; X86-LABEL: test_mm_maskz_sub_ss:
3272 ; X86: ## %bb.0: ## %entry
3273 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3274 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3275 ; X86-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1]
3276 ; X86-NEXT: retl ## encoding: [0xc3]
3278 ; X64-LABEL: test_mm_maskz_sub_ss:
3279 ; X64: ## %bb.0: ## %entry
3280 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3281 ; X64-NEXT: vsubss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5c,0xc1]
3282 ; X64-NEXT: retq ## encoding: [0xc3]
3284 %vecext.i.i = extractelement <4 x float> %__B, i32 0
3285 %vecext1.i.i = extractelement <4 x float> %__A, i32 0
3286 %sub.i.i = fsub float %vecext1.i.i, %vecext.i.i
3288 %tobool.i = icmp eq i8 %0, 0
3289 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %sub.i.i
3290 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
3291 ret <4 x float> %vecins.i
3294 define <2 x double> @test_mm_mask_sub_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3295 ; X86-LABEL: test_mm_mask_sub_sd:
3296 ; X86: ## %bb.0: ## %entry
3297 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3298 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3299 ; X86-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2]
3300 ; X86-NEXT: retl ## encoding: [0xc3]
3302 ; X64-LABEL: test_mm_mask_sub_sd:
3303 ; X64: ## %bb.0: ## %entry
3304 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3305 ; X64-NEXT: vsubsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5c,0xc2]
3306 ; X64-NEXT: retq ## encoding: [0xc3]
3308 %vecext.i.i = extractelement <2 x double> %__B, i32 0
3309 %vecext1.i.i = extractelement <2 x double> %__A, i32 0
3310 %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i
3312 %tobool.i = icmp eq i8 %0, 0
3313 %vecext1.i = extractelement <2 x double> %__W, i32 0
3314 %cond.i = select i1 %tobool.i, double %vecext1.i, double %sub.i.i
3315 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3316 ret <2 x double> %vecins.i
3319 define <2 x double> @test_mm_maskz_sub_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3320 ; X86-LABEL: test_mm_maskz_sub_sd:
3321 ; X86: ## %bb.0: ## %entry
3322 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3323 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3324 ; X86-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1]
3325 ; X86-NEXT: retl ## encoding: [0xc3]
3327 ; X64-LABEL: test_mm_maskz_sub_sd:
3328 ; X64: ## %bb.0: ## %entry
3329 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3330 ; X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5c,0xc1]
3331 ; X64-NEXT: retq ## encoding: [0xc3]
3333 %vecext.i.i = extractelement <2 x double> %__B, i32 0
3334 %vecext1.i.i = extractelement <2 x double> %__A, i32 0
3335 %sub.i.i = fsub double %vecext1.i.i, %vecext.i.i
3337 %tobool.i = icmp eq i8 %0, 0
3338 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %sub.i.i
3339 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3340 ret <2 x double> %vecins.i
3343 define <4 x float> @test_mm_mask_mul_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3344 ; X86-LABEL: test_mm_mask_mul_ss:
3345 ; X86: ## %bb.0: ## %entry
3346 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3347 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3348 ; X86-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2]
3349 ; X86-NEXT: retl ## encoding: [0xc3]
3351 ; X64-LABEL: test_mm_mask_mul_ss:
3352 ; X64: ## %bb.0: ## %entry
3353 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3354 ; X64-NEXT: vmulss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x59,0xc2]
3355 ; X64-NEXT: retq ## encoding: [0xc3]
3357 %vecext.i.i = extractelement <4 x float> %__B, i32 0
3358 %vecext1.i.i = extractelement <4 x float> %__A, i32 0
3359 %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i
3361 %tobool.i = icmp eq i8 %0, 0
3362 %vecext1.i = extractelement <4 x float> %__W, i32 0
3363 %cond.i = select i1 %tobool.i, float %vecext1.i, float %mul.i.i
3364 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
3365 ret <4 x float> %vecins.i
3368 define <4 x float> @test_mm_maskz_mul_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3369 ; X86-LABEL: test_mm_maskz_mul_ss:
3370 ; X86: ## %bb.0: ## %entry
3371 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3372 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3373 ; X86-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1]
3374 ; X86-NEXT: retl ## encoding: [0xc3]
3376 ; X64-LABEL: test_mm_maskz_mul_ss:
3377 ; X64: ## %bb.0: ## %entry
3378 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3379 ; X64-NEXT: vmulss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x59,0xc1]
3380 ; X64-NEXT: retq ## encoding: [0xc3]
3382 %vecext.i.i = extractelement <4 x float> %__B, i32 0
3383 %vecext1.i.i = extractelement <4 x float> %__A, i32 0
3384 %mul.i.i = fmul float %vecext1.i.i, %vecext.i.i
3386 %tobool.i = icmp eq i8 %0, 0
3387 %cond.i = select i1 %tobool.i, float 0.000000e+00, float %mul.i.i
3388 %vecins.i = insertelement <4 x float> %__A, float %cond.i, i32 0
3389 ret <4 x float> %vecins.i
3392 define <2 x double> @test_mm_mask_mul_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3393 ; X86-LABEL: test_mm_mask_mul_sd:
3394 ; X86: ## %bb.0: ## %entry
3395 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3396 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3397 ; X86-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2]
3398 ; X86-NEXT: retl ## encoding: [0xc3]
3400 ; X64-LABEL: test_mm_mask_mul_sd:
3401 ; X64: ## %bb.0: ## %entry
3402 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3403 ; X64-NEXT: vmulsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x59,0xc2]
3404 ; X64-NEXT: retq ## encoding: [0xc3]
3406 %vecext.i.i = extractelement <2 x double> %__B, i32 0
3407 %vecext1.i.i = extractelement <2 x double> %__A, i32 0
3408 %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i
3410 %tobool.i = icmp eq i8 %0, 0
3411 %vecext1.i = extractelement <2 x double> %__W, i32 0
3412 %cond.i = select i1 %tobool.i, double %vecext1.i, double %mul.i.i
3413 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3414 ret <2 x double> %vecins.i
3417 define <2 x double> @test_mm_maskz_mul_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3418 ; X86-LABEL: test_mm_maskz_mul_sd:
3419 ; X86: ## %bb.0: ## %entry
3420 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3421 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3422 ; X86-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1]
3423 ; X86-NEXT: retl ## encoding: [0xc3]
3425 ; X64-LABEL: test_mm_maskz_mul_sd:
3426 ; X64: ## %bb.0: ## %entry
3427 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3428 ; X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x59,0xc1]
3429 ; X64-NEXT: retq ## encoding: [0xc3]
3431 %vecext.i.i = extractelement <2 x double> %__B, i32 0
3432 %vecext1.i.i = extractelement <2 x double> %__A, i32 0
3433 %mul.i.i = fmul double %vecext1.i.i, %vecext.i.i
3435 %tobool.i = icmp eq i8 %0, 0
3436 %cond.i = select i1 %tobool.i, double 0.000000e+00, double %mul.i.i
3437 %vecins.i = insertelement <2 x double> %__A, double %cond.i, i32 0
3438 ret <2 x double> %vecins.i
3441 define <4 x float> @test_mm_mask_div_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3442 ; X86-LABEL: test_mm_mask_div_ss:
3443 ; X86: ## %bb.0: ## %entry
3444 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3445 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3446 ; X86-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2]
3447 ; X86-NEXT: retl ## encoding: [0xc3]
3449 ; X64-LABEL: test_mm_mask_div_ss:
3450 ; X64: ## %bb.0: ## %entry
3451 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3452 ; X64-NEXT: vdivss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x5e,0xc2]
3453 ; X64-NEXT: retq ## encoding: [0xc3]
3455 %0 = extractelement <4 x float> %__A, i64 0
3456 %1 = extractelement <4 x float> %__B, i64 0
3457 %2 = extractelement <4 x float> %__W, i64 0
3458 %3 = fdiv float %0, %1
3459 %4 = bitcast i8 %__U to <8 x i1>
3460 %5 = extractelement <8 x i1> %4, i64 0
3461 %6 = select i1 %5, float %3, float %2
3462 %7 = insertelement <4 x float> %__A, float %6, i64 0
3466 define <4 x float> @test_mm_maskz_div_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
3467 ; X86-LABEL: test_mm_maskz_div_ss:
3468 ; X86: ## %bb.0: ## %entry
3469 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3470 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3471 ; X86-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1]
3472 ; X86-NEXT: retl ## encoding: [0xc3]
3474 ; X64-LABEL: test_mm_maskz_div_ss:
3475 ; X64: ## %bb.0: ## %entry
3476 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3477 ; X64-NEXT: vdivss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x5e,0xc1]
3478 ; X64-NEXT: retq ## encoding: [0xc3]
3480 %0 = extractelement <4 x float> %__A, i64 0
3481 %1 = extractelement <4 x float> %__B, i64 0
3482 %2 = fdiv float %0, %1
3483 %3 = bitcast i8 %__U to <8 x i1>
3484 %4 = extractelement <8 x i1> %3, i64 0
3485 %5 = select i1 %4, float %2, float 0.000000e+00
3486 %6 = insertelement <4 x float> %__A, float %5, i64 0
3490 define <2 x double> @test_mm_mask_div_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3491 ; X86-LABEL: test_mm_mask_div_sd:
3492 ; X86: ## %bb.0: ## %entry
3493 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3494 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3495 ; X86-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2]
3496 ; X86-NEXT: retl ## encoding: [0xc3]
3498 ; X64-LABEL: test_mm_mask_div_sd:
3499 ; X64: ## %bb.0: ## %entry
3500 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3501 ; X64-NEXT: vdivsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x5e,0xc2]
3502 ; X64-NEXT: retq ## encoding: [0xc3]
3504 %0 = extractelement <2 x double> %__A, i64 0
3505 %1 = extractelement <2 x double> %__B, i64 0
3506 %2 = extractelement <2 x double> %__W, i64 0
3507 %3 = fdiv double %0, %1
3508 %4 = bitcast i8 %__U to <8 x i1>
3509 %5 = extractelement <8 x i1> %4, i64 0
3510 %6 = select i1 %5, double %3, double %2
3511 %7 = insertelement <2 x double> %__A, double %6, i64 0
3515 define <2 x double> @test_mm_maskz_div_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
3516 ; X86-LABEL: test_mm_maskz_div_sd:
3517 ; X86: ## %bb.0: ## %entry
3518 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
3519 ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
3520 ; X86-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1]
3521 ; X86-NEXT: retl ## encoding: [0xc3]
3523 ; X64-LABEL: test_mm_maskz_div_sd:
3524 ; X64: ## %bb.0: ## %entry
3525 ; X64-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
3526 ; X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x5e,0xc1]
3527 ; X64-NEXT: retq ## encoding: [0xc3]
3529 %0 = extractelement <2 x double> %__A, i64 0
3530 %1 = extractelement <2 x double> %__B, i64 0
3531 %2 = fdiv double %0, %1
3532 %3 = bitcast i8 %__U to <8 x i1>
3533 %4 = extractelement <8 x i1> %3, i64 0
3534 %5 = select i1 %4, double %2, double 0.000000e+00
3535 %6 = insertelement <2 x double> %__A, double %5, i64 0
3539 declare <8 x double> @llvm.fma.v8f64(<8 x double>, <8 x double>, <8 x double>) #9
3540 declare <16 x float> @llvm.fma.v16f32(<16 x float>, <16 x float>, <16 x float>) #9
3541 declare float @llvm.fma.f32(float, float, float) #9
3542 declare double @llvm.fma.f64(double, double, double) #9