[PowerPC] Recommit r314244 with refactoring and off by default
[llvm-core.git] / test / CodeGen / X86 / fma-intrinsics-x86.ll
blob6b28d0c19cf144c5e5987d64aa24f43922e0fb34
1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL
4 ; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN
6 ; VFMADD
7 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
8 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ss:
9 ; CHECK-FMA:       # BB#0:
10 ; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
11 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
13 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ss:
14 ; CHECK-AVX512VL:       # BB#0:
15 ; CHECK-AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
16 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
18 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ss:
19 ; CHECK-FMA-WIN:       # BB#0:
20 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
21 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
22 ; CHECK-FMA-WIN-NEXT:    vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00]
23 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
24   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
25   ret <4 x float> %res
28 define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
29 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_ss:
30 ; CHECK-FMA:       # BB#0:
31 ; CHECK-FMA-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xa9,0xca]
32 ; CHECK-FMA-NEXT:    vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
33 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
35 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_ss:
36 ; CHECK-AVX512VL:       # BB#0:
37 ; CHECK-AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xca]
38 ; CHECK-AVX512VL-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
39 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
41 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_ss:
42 ; CHECK-FMA-WIN:       # BB#0:
43 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
44 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
45 ; CHECK-FMA-WIN-NEXT:    vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00]
46 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
47   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
48   ret <4 x float> %res
50 declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
52 define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
53 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_sd:
54 ; CHECK-FMA:       # BB#0:
55 ; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
56 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
58 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_sd:
59 ; CHECK-AVX512VL:       # BB#0:
60 ; CHECK-AVX512VL-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xc2]
61 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
63 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_sd:
64 ; CHECK-FMA-WIN:       # BB#0:
65 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
66 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
67 ; CHECK-FMA-WIN-NEXT:    vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00]
68 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
69   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
70   ret <2 x double> %res
73 define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
74 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_bac_sd:
75 ; CHECK-FMA:       # BB#0:
76 ; CHECK-FMA-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
77 ; CHECK-FMA-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
78 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
80 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_bac_sd:
81 ; CHECK-AVX512VL:       # BB#0:
82 ; CHECK-AVX512VL-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xca]
83 ; CHECK-AVX512VL-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
84 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
86 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_bac_sd:
87 ; CHECK-FMA-WIN:       # BB#0:
88 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
89 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
90 ; CHECK-FMA-WIN-NEXT:    vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00]
91 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
92   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
93   ret <2 x double> %res
95 declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
97 define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
98 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps:
99 ; CHECK-FMA:       # BB#0:
100 ; CHECK-FMA-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
101 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
103 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps:
104 ; CHECK-AVX512VL:       # BB#0:
105 ; CHECK-AVX512VL-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
106 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
108 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps:
109 ; CHECK-FMA-WIN:       # BB#0:
110 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
111 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
112 ; CHECK-FMA-WIN-NEXT:    vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
113 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
114   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
115   ret <4 x float> %res
117 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
119 define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
120 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd:
121 ; CHECK-FMA:       # BB#0:
122 ; CHECK-FMA-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
123 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
125 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd:
126 ; CHECK-AVX512VL:       # BB#0:
127 ; CHECK-AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
128 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
130 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd:
131 ; CHECK-FMA-WIN:       # BB#0:
132 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
133 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
134 ; CHECK-FMA-WIN-NEXT:    vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
135 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
136   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
137   ret <2 x double> %res
139 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
141 define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
142 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_ps_256:
143 ; CHECK-FMA:       # BB#0:
144 ; CHECK-FMA-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
145 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
147 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_ps_256:
148 ; CHECK-AVX512VL:       # BB#0:
149 ; CHECK-AVX512VL-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
150 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
152 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_ps_256:
153 ; CHECK-FMA-WIN:       # BB#0:
154 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
155 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
156 ; CHECK-FMA-WIN-NEXT:    vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
157 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
158   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
159   ret <8 x float> %res
161 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
163 define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
164 ; CHECK-FMA-LABEL: test_x86_fma_vfmadd_pd_256:
165 ; CHECK-FMA:       # BB#0:
166 ; CHECK-FMA-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
167 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
169 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmadd_pd_256:
170 ; CHECK-AVX512VL:       # BB#0:
171 ; CHECK-AVX512VL-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
172 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
174 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmadd_pd_256:
175 ; CHECK-FMA-WIN:       # BB#0:
176 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
177 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
178 ; CHECK-FMA-WIN-NEXT:    vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
179 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
180   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
181   ret <4 x double> %res
183 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
185 ; VFMSUB
186 define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
187 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ss:
188 ; CHECK-FMA:       # BB#0:
189 ; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xab,0xc2]
190 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
192 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ss:
193 ; CHECK-AVX512VL:       # BB#0:
194 ; CHECK-AVX512VL-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xab,0xc2]
195 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
197 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ss:
198 ; CHECK-FMA-WIN:       # BB#0:
199 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
200 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
201 ; CHECK-FMA-WIN-NEXT:    vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00]
202 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
203   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
204   ret <4 x float> %res
207 define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
208 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_ss:
209 ; CHECK-FMA:       # BB#0:
210 ; CHECK-FMA-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xab,0xca]
211 ; CHECK-FMA-NEXT:    vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
212 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
214 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_ss:
215 ; CHECK-AVX512VL:       # BB#0:
216 ; CHECK-AVX512VL-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xab,0xca]
217 ; CHECK-AVX512VL-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
218 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
220 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_ss:
221 ; CHECK-FMA-WIN:       # BB#0:
222 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
223 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
224 ; CHECK-FMA-WIN-NEXT:    vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00]
225 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
226   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
227   ret <4 x float> %res
229 declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
231 define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
232 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_sd:
233 ; CHECK-FMA:       # BB#0:
234 ; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
235 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
237 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_sd:
238 ; CHECK-AVX512VL:       # BB#0:
239 ; CHECK-AVX512VL-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xab,0xc2]
240 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
242 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_sd:
243 ; CHECK-FMA-WIN:       # BB#0:
244 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
245 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
246 ; CHECK-FMA-WIN-NEXT:    vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00]
247 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
248   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
249   ret <2 x double> %res
252 define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
253 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_bac_sd:
254 ; CHECK-FMA:       # BB#0:
255 ; CHECK-FMA-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xab,0xca]
256 ; CHECK-FMA-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
257 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
259 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_bac_sd:
260 ; CHECK-AVX512VL:       # BB#0:
261 ; CHECK-AVX512VL-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xab,0xca]
262 ; CHECK-AVX512VL-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
263 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
265 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_bac_sd:
266 ; CHECK-FMA-WIN:       # BB#0:
267 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
268 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
269 ; CHECK-FMA-WIN-NEXT:    vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00]
270 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
271   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
272   ret <2 x double> %res
274 declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
276 define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
277 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps:
278 ; CHECK-FMA:       # BB#0:
279 ; CHECK-FMA-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
280 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
282 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps:
283 ; CHECK-AVX512VL:       # BB#0:
284 ; CHECK-AVX512VL-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaa,0xc2]
285 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
287 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps:
288 ; CHECK-FMA-WIN:       # BB#0:
289 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
290 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
291 ; CHECK-FMA-WIN-NEXT:    vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
292 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
293   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
294   ret <4 x float> %res
296 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
298 define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
299 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd:
300 ; CHECK-FMA:       # BB#0:
301 ; CHECK-FMA-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
302 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
304 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd:
305 ; CHECK-AVX512VL:       # BB#0:
306 ; CHECK-AVX512VL-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaa,0xc2]
307 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
309 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd:
310 ; CHECK-FMA-WIN:       # BB#0:
311 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
312 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
313 ; CHECK-FMA-WIN-NEXT:    vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
314 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
315   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
316   ret <2 x double> %res
318 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
320 define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
321 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_ps_256:
322 ; CHECK-FMA:       # BB#0:
323 ; CHECK-FMA-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
324 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
326 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_ps_256:
327 ; CHECK-AVX512VL:       # BB#0:
328 ; CHECK-AVX512VL-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xaa,0xc2]
329 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
331 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_ps_256:
332 ; CHECK-FMA-WIN:       # BB#0:
333 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
334 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
335 ; CHECK-FMA-WIN-NEXT:    vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
336 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
337   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
338   ret <8 x float> %res
340 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
342 define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
343 ; CHECK-FMA-LABEL: test_x86_fma_vfmsub_pd_256:
344 ; CHECK-FMA:       # BB#0:
345 ; CHECK-FMA-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
346 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
348 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsub_pd_256:
349 ; CHECK-AVX512VL:       # BB#0:
350 ; CHECK-AVX512VL-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xaa,0xc2]
351 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
353 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsub_pd_256:
354 ; CHECK-FMA-WIN:       # BB#0:
355 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
356 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
357 ; CHECK-FMA-WIN-NEXT:    vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
358 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
359   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
360   ret <4 x double> %res
362 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
364 ; VFNMADD
365 define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
366 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ss:
367 ; CHECK-FMA:       # BB#0:
368 ; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xad,0xc2]
369 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
371 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ss:
372 ; CHECK-AVX512VL:       # BB#0:
373 ; CHECK-AVX512VL-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xad,0xc2]
374 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
376 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ss:
377 ; CHECK-FMA-WIN:       # BB#0:
378 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
379 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
380 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00]
381 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
382   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
383   ret <4 x float> %res
386 define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
387 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_ss:
388 ; CHECK-FMA:       # BB#0:
389 ; CHECK-FMA-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xad,0xca]
390 ; CHECK-FMA-NEXT:    vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
391 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
393 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_ss:
394 ; CHECK-AVX512VL:       # BB#0:
395 ; CHECK-AVX512VL-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xad,0xca]
396 ; CHECK-AVX512VL-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
397 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
399 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_ss:
400 ; CHECK-FMA-WIN:       # BB#0:
401 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
402 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
403 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00]
404 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
405   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
406   ret <4 x float> %res
408 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
410 define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
411 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_sd:
412 ; CHECK-FMA:       # BB#0:
413 ; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
414 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
416 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_sd:
417 ; CHECK-AVX512VL:       # BB#0:
418 ; CHECK-AVX512VL-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xad,0xc2]
419 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
421 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_sd:
422 ; CHECK-FMA-WIN:       # BB#0:
423 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
424 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
425 ; CHECK-FMA-WIN-NEXT:    vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00]
426 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
427   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
428   ret <2 x double> %res
431 define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
432 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_bac_sd:
433 ; CHECK-FMA:       # BB#0:
434 ; CHECK-FMA-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xad,0xca]
435 ; CHECK-FMA-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
436 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
438 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_bac_sd:
439 ; CHECK-AVX512VL:       # BB#0:
440 ; CHECK-AVX512VL-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xad,0xca]
441 ; CHECK-AVX512VL-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
442 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
444 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_bac_sd:
445 ; CHECK-FMA-WIN:       # BB#0:
446 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
447 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
448 ; CHECK-FMA-WIN-NEXT:    vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00]
449 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
450   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
451   ret <2 x double> %res
453 declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
455 define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
456 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps:
457 ; CHECK-FMA:       # BB#0:
458 ; CHECK-FMA-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xac,0xc2]
459 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
461 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps:
462 ; CHECK-AVX512VL:       # BB#0:
463 ; CHECK-AVX512VL-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
464 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
466 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps:
467 ; CHECK-FMA-WIN:       # BB#0:
468 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
469 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
470 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
471 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
472   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
473   ret <4 x float> %res
475 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
477 define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
478 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd:
479 ; CHECK-FMA:       # BB#0:
480 ; CHECK-FMA-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
481 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
483 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd:
484 ; CHECK-AVX512VL:       # BB#0:
485 ; CHECK-AVX512VL-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
486 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
488 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd:
489 ; CHECK-FMA-WIN:       # BB#0:
490 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
491 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
492 ; CHECK-FMA-WIN-NEXT:    vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
493 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
494   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
495   ret <2 x double> %res
497 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
499 define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
500 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_ps_256:
501 ; CHECK-FMA:       # BB#0:
502 ; CHECK-FMA-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xac,0xc2]
503 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
505 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_ps_256:
506 ; CHECK-AVX512VL:       # BB#0:
507 ; CHECK-AVX512VL-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
508 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
510 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_ps_256:
511 ; CHECK-FMA-WIN:       # BB#0:
512 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
513 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
514 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
515 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
516   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
517   ret <8 x float> %res
519 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
521 define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
522 ; CHECK-FMA-LABEL: test_x86_fma_vfnmadd_pd_256:
523 ; CHECK-FMA:       # BB#0:
524 ; CHECK-FMA-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
525 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
527 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmadd_pd_256:
528 ; CHECK-AVX512VL:       # BB#0:
529 ; CHECK-AVX512VL-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
530 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
532 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmadd_pd_256:
533 ; CHECK-FMA-WIN:       # BB#0:
534 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
535 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
536 ; CHECK-FMA-WIN-NEXT:    vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
537 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
538   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
539   ret <4 x double> %res
541 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
543 ; VFNMSUB
544 define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
545 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ss:
546 ; CHECK-FMA:       # BB#0:
547 ; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
548 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
550 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ss:
551 ; CHECK-AVX512VL:       # BB#0:
552 ; CHECK-AVX512VL-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xaf,0xc2]
553 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
555 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ss:
556 ; CHECK-FMA-WIN:       # BB#0:
557 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x0a]
558 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
559 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00]
560 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
561   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
562   ret <4 x float> %res
565 define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
566 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_ss:
567 ; CHECK-FMA:       # BB#0:
568 ; CHECK-FMA-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0x79,0xaf,0xca]
569 ; CHECK-FMA-NEXT:    vmovaps %xmm1, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc1]
570 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
572 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_ss:
573 ; CHECK-AVX512VL:       # BB#0:
574 ; CHECK-AVX512VL-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xaf,0xca]
575 ; CHECK-AVX512VL-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
576 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
578 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_ss:
579 ; CHECK-FMA-WIN:       # BB#0:
580 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
581 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
582 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00]
583 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
584   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
585   ret <4 x float> %res
587 declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
589 define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
590 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_sd:
591 ; CHECK-FMA:       # BB#0:
592 ; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
593 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
595 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_sd:
596 ; CHECK-AVX512VL:       # BB#0:
597 ; CHECK-AVX512VL-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xaf,0xc2]
598 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
600 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_sd:
601 ; CHECK-FMA-WIN:       # BB#0:
602 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x0a]
603 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
604 ; CHECK-FMA-WIN-NEXT:    vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00]
605 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
606   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
607   ret <2 x double> %res
610 define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
611 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_bac_sd:
612 ; CHECK-FMA:       # BB#0:
613 ; CHECK-FMA-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1 # encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
614 ; CHECK-FMA-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
615 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
617 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_bac_sd:
618 ; CHECK-AVX512VL:       # BB#0:
619 ; CHECK-AVX512VL-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xaf,0xca]
620 ; CHECK-AVX512VL-NEXT:    vmovapd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc1]
621 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
623 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_bac_sd:
624 ; CHECK-FMA-WIN:       # BB#0:
625 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
626 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
627 ; CHECK-FMA-WIN-NEXT:    vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00]
628 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
629   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
630   ret <2 x double> %res
632 declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
634 define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
635 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps:
636 ; CHECK-FMA:       # BB#0:
637 ; CHECK-FMA-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xae,0xc2]
638 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
640 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps:
641 ; CHECK-AVX512VL:       # BB#0:
642 ; CHECK-AVX512VL-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
643 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
645 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps:
646 ; CHECK-FMA-WIN:       # BB#0:
647 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
648 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
649 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
650 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
651   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
652   ret <4 x float> %res
654 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
656 define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
657 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd:
658 ; CHECK-FMA:       # BB#0:
659 ; CHECK-FMA-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
660 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
662 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd:
663 ; CHECK-AVX512VL:       # BB#0:
664 ; CHECK-AVX512VL-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
665 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
667 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd:
668 ; CHECK-FMA-WIN:       # BB#0:
669 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
670 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
671 ; CHECK-FMA-WIN-NEXT:    vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
672 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
673   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
674   ret <2 x double> %res
676 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
678 define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
679 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_ps_256:
680 ; CHECK-FMA:       # BB#0:
681 ; CHECK-FMA-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xae,0xc2]
682 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
684 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_ps_256:
685 ; CHECK-AVX512VL:       # BB#0:
686 ; CHECK-AVX512VL-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
687 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
689 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_ps_256:
690 ; CHECK-FMA-WIN:       # BB#0:
691 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
692 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
693 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
694 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
695   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
696   ret <8 x float> %res
698 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
700 define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
701 ; CHECK-FMA-LABEL: test_x86_fma_vfnmsub_pd_256:
702 ; CHECK-FMA:       # BB#0:
703 ; CHECK-FMA-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
704 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
706 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfnmsub_pd_256:
707 ; CHECK-AVX512VL:       # BB#0:
708 ; CHECK-AVX512VL-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
709 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
711 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfnmsub_pd_256:
712 ; CHECK-FMA-WIN:       # BB#0:
713 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
714 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
715 ; CHECK-FMA-WIN-NEXT:    vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
716 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
717   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
718   ret <4 x double> %res
720 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
722 ; VFMADDSUB
723 define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
724 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps:
725 ; CHECK-FMA:       # BB#0:
726 ; CHECK-FMA-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
727 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
729 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps:
730 ; CHECK-AVX512VL:       # BB#0:
731 ; CHECK-AVX512VL-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
732 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
734 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps:
735 ; CHECK-FMA-WIN:       # BB#0:
736 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
737 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
738 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
739 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
740   %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
741   ret <4 x float> %res
743 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
745 define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
746 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd:
747 ; CHECK-FMA:       # BB#0:
748 ; CHECK-FMA-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
749 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
751 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd:
752 ; CHECK-AVX512VL:       # BB#0:
753 ; CHECK-AVX512VL-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
754 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
756 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd:
757 ; CHECK-FMA-WIN:       # BB#0:
758 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
759 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
760 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
761 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
762   %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
763   ret <2 x double> %res
765 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
767 define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
768 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_ps_256:
769 ; CHECK-FMA:       # BB#0:
770 ; CHECK-FMA-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
771 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
773 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_ps_256:
774 ; CHECK-AVX512VL:       # BB#0:
775 ; CHECK-AVX512VL-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
776 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
778 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_ps_256:
779 ; CHECK-FMA-WIN:       # BB#0:
780 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
781 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
782 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
783 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
784   %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
785   ret <8 x float> %res
787 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
789 define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
790 ; CHECK-FMA-LABEL: test_x86_fma_vfmaddsub_pd_256:
791 ; CHECK-FMA:       # BB#0:
792 ; CHECK-FMA-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
793 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
795 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmaddsub_pd_256:
796 ; CHECK-AVX512VL:       # BB#0:
797 ; CHECK-AVX512VL-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
798 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
800 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmaddsub_pd_256:
801 ; CHECK-FMA-WIN:       # BB#0:
802 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
803 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
804 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
805 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
806   %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
807   ret <4 x double> %res
809 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
811 ; VFMSUBADD
812 define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
813 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps:
814 ; CHECK-FMA:       # BB#0:
815 ; CHECK-FMA-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
816 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
818 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps:
819 ; CHECK-AVX512VL:       # BB#0:
820 ; CHECK-AVX512VL-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa7,0xc2]
821 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
823 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps:
824 ; CHECK-FMA-WIN:       # BB#0:
825 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm1 # encoding: [0xc5,0xf8,0x28,0x09]
826 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
827 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
828 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
829   %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
830   ret <4 x float> %res
832 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
834 define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
835 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd:
836 ; CHECK-FMA:       # BB#0:
837 ; CHECK-FMA-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0 # encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
838 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
840 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd:
841 ; CHECK-AVX512VL:       # BB#0:
842 ; CHECK-AVX512VL-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa7,0xc2]
843 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
845 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd:
846 ; CHECK-FMA-WIN:       # BB#0:
847 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm1 # encoding: [0xc5,0xf9,0x28,0x09]
848 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
849 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
850 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
851   %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
852   ret <2 x double> %res
854 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
856 define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
857 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_ps_256:
858 ; CHECK-FMA:       # BB#0:
859 ; CHECK-FMA-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
860 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
862 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_ps_256:
863 ; CHECK-AVX512VL:       # BB#0:
864 ; CHECK-AVX512VL-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa7,0xc2]
865 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
867 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_ps_256:
868 ; CHECK-FMA-WIN:       # BB#0:
869 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %ymm1 # encoding: [0xc5,0xfc,0x28,0x09]
870 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
871 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
872 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
873   %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
874   ret <8 x float> %res
876 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
878 define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
879 ; CHECK-FMA-LABEL: test_x86_fma_vfmsubadd_pd_256:
880 ; CHECK-FMA:       # BB#0:
881 ; CHECK-FMA-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
882 ; CHECK-FMA-NEXT:    retq # encoding: [0xc3]
884 ; CHECK-AVX512VL-LABEL: test_x86_fma_vfmsubadd_pd_256:
885 ; CHECK-AVX512VL:       # BB#0:
886 ; CHECK-AVX512VL-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa7,0xc2]
887 ; CHECK-AVX512VL-NEXT:    retq # encoding: [0xc3]
889 ; CHECK-FMA-WIN-LABEL: test_x86_fma_vfmsubadd_pd_256:
890 ; CHECK-FMA-WIN:       # BB#0:
891 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %ymm1 # encoding: [0xc5,0xfd,0x28,0x09]
892 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
893 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
894 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
895   %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
896   ret <4 x double> %res
898 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
900 attributes #0 = { nounwind }