1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X86-SSE
3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX1,X86-AVX1
4 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX512,X86-AVX512
5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=SSE,X64-SSE
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX1,X64-AVX1
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=AVX,AVX512,X64-AVX512
9 define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) {
10 ; SSE-LABEL: test_x86_ssse3_phadd_d_128:
12 ; SSE-NEXT: phaddd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x02,0xc1]
13 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
15 ; AVX-LABEL: test_x86_ssse3_phadd_d_128:
17 ; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x02,0xc1]
18 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
19 %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
22 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
25 define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
26 ; SSE-LABEL: test_x86_ssse3_phadd_sw_128:
28 ; SSE-NEXT: phaddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x03,0xc1]
29 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
31 ; AVX-LABEL: test_x86_ssse3_phadd_sw_128:
33 ; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x03,0xc1]
34 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
35 %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
38 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
41 define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) {
42 ; SSE-LABEL: test_x86_ssse3_phadd_w_128:
44 ; SSE-NEXT: phaddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x01,0xc1]
45 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
47 ; AVX-LABEL: test_x86_ssse3_phadd_w_128:
49 ; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x01,0xc1]
50 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
51 %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
54 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
57 define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) {
58 ; SSE-LABEL: test_x86_ssse3_phsub_d_128:
60 ; SSE-NEXT: phsubd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x06,0xc1]
61 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
63 ; AVX-LABEL: test_x86_ssse3_phsub_d_128:
65 ; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x06,0xc1]
66 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
67 %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
70 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
73 define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
74 ; SSE-LABEL: test_x86_ssse3_phsub_sw_128:
76 ; SSE-NEXT: phsubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x07,0xc1]
77 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
79 ; AVX-LABEL: test_x86_ssse3_phsub_sw_128:
81 ; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x07,0xc1]
82 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
83 %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
86 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
89 define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) {
90 ; SSE-LABEL: test_x86_ssse3_phsub_w_128:
92 ; SSE-NEXT: phsubw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x05,0xc1]
93 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
95 ; AVX-LABEL: test_x86_ssse3_phsub_w_128:
97 ; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x05,0xc1]
98 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
99 %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
102 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
105 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) {
106 ; SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
108 ; SSE-NEXT: pmaddubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x04,0xc1]
109 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
111 ; AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
113 ; AVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x04,0xc1]
114 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
116 ; AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128:
118 ; AVX512-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1]
119 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
120 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
123 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
126 ; Make sure we don't commute this operation.
127 define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128_load_op0(ptr %ptr, <16 x i8> %a1) {
128 ; X86-SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
130 ; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
131 ; X86-SSE-NEXT: movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08]
132 ; X86-SSE-NEXT: pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8]
133 ; X86-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
134 ; X86-SSE-NEXT: retl ## encoding: [0xc3]
136 ; X86-AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
137 ; X86-AVX1: ## %bb.0:
138 ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
139 ; X86-AVX1-NEXT: vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08]
140 ; X86-AVX1-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0]
141 ; X86-AVX1-NEXT: retl ## encoding: [0xc3]
143 ; X86-AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
144 ; X86-AVX512: ## %bb.0:
145 ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
146 ; X86-AVX512-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08]
147 ; X86-AVX512-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0]
148 ; X86-AVX512-NEXT: retl ## encoding: [0xc3]
150 ; X64-SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
152 ; X64-SSE-NEXT: movdqa (%rdi), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x0f]
153 ; X64-SSE-NEXT: pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8]
154 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1]
155 ; X64-SSE-NEXT: retq ## encoding: [0xc3]
157 ; X64-AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
158 ; X64-AVX1: ## %bb.0:
159 ; X64-AVX1-NEXT: vmovdqa (%rdi), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x0f]
160 ; X64-AVX1-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0]
161 ; X64-AVX1-NEXT: retq ## encoding: [0xc3]
163 ; X64-AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0:
164 ; X64-AVX512: ## %bb.0:
165 ; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f]
166 ; X64-AVX512-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0]
167 ; X64-AVX512-NEXT: retq ## encoding: [0xc3]
168 %a0 = load <16 x i8>, ptr %ptr
169 %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1]
174 define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) {
175 ; SSE-LABEL: test_x86_ssse3_pmul_hr_sw_128:
177 ; SSE-NEXT: pmulhrsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x0b,0xc1]
178 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
180 ; AVX1-LABEL: test_x86_ssse3_pmul_hr_sw_128:
182 ; AVX1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
183 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
185 ; AVX512-LABEL: test_x86_ssse3_pmul_hr_sw_128:
187 ; AVX512-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1]
188 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
189 %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
192 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
195 define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) {
196 ; SSE-LABEL: test_x86_ssse3_pshuf_b_128:
198 ; SSE-NEXT: pshufb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x00,0xc1]
199 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
201 ; AVX1-LABEL: test_x86_ssse3_pshuf_b_128:
203 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1]
204 ; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
206 ; AVX512-LABEL: test_x86_ssse3_pshuf_b_128:
208 ; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1]
209 ; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
210 %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
213 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
216 define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) {
217 ; SSE-LABEL: test_x86_ssse3_psign_b_128:
219 ; SSE-NEXT: psignb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x08,0xc1]
220 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
222 ; AVX-LABEL: test_x86_ssse3_psign_b_128:
224 ; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x08,0xc1]
225 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
226 %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
229 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
232 define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) {
233 ; SSE-LABEL: test_x86_ssse3_psign_d_128:
235 ; SSE-NEXT: psignd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x0a,0xc1]
236 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
238 ; AVX-LABEL: test_x86_ssse3_psign_d_128:
240 ; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0a,0xc1]
241 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
242 %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
245 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
248 define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) {
249 ; SSE-LABEL: test_x86_ssse3_psign_w_128:
251 ; SSE-NEXT: psignw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x09,0xc1]
252 ; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
254 ; AVX-LABEL: test_x86_ssse3_psign_w_128:
256 ; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x09,0xc1]
257 ; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3]
258 %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
261 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone