1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=AVX2
4 ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=SKX
6 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c
9 define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) {
10 ; SSE-LABEL: test_x86_sse2_paddus_b:
12 ; SSE-NEXT: paddusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdc,0xc1]
13 ; SSE-NEXT: retl ## encoding: [0xc3]
15 ; AVX2-LABEL: test_x86_sse2_paddus_b:
17 ; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1]
18 ; AVX2-NEXT: retl ## encoding: [0xc3]
20 ; SKX-LABEL: test_x86_sse2_paddus_b:
22 ; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
23 ; SKX-NEXT: retl ## encoding: [0xc3]
24 %1 = add <16 x i8> %a0, %a1
25 %2 = icmp ugt <16 x i8> %a0, %1
26 %3 = select <16 x i1> %2, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <16 x i8> %1
30 define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) {
31 ; SSE-LABEL: test_x86_sse2_paddus_w:
33 ; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
34 ; SSE-NEXT: retl ## encoding: [0xc3]
36 ; AVX2-LABEL: test_x86_sse2_paddus_w:
38 ; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
39 ; AVX2-NEXT: retl ## encoding: [0xc3]
41 ; SKX-LABEL: test_x86_sse2_paddus_w:
43 ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
44 ; SKX-NEXT: retl ## encoding: [0xc3]
45 %1 = add <8 x i16> %a0, %a1
46 %2 = icmp ugt <8 x i16> %a0, %1
47 %3 = select <8 x i1> %2, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %1
51 define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) {
52 ; SSE-LABEL: test_x86_sse2_psubus_b:
54 ; SSE-NEXT: psubusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd8,0xc1]
55 ; SSE-NEXT: retl ## encoding: [0xc3]
57 ; AVX2-LABEL: test_x86_sse2_psubus_b:
59 ; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1]
60 ; AVX2-NEXT: retl ## encoding: [0xc3]
62 ; SKX-LABEL: test_x86_sse2_psubus_b:
64 ; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
65 ; SKX-NEXT: retl ## encoding: [0xc3]
66 %cmp = icmp ugt <16 x i8> %a0, %a1
67 %sel = select <16 x i1> %cmp, <16 x i8> %a0, <16 x i8> %a1
68 %sub = sub <16 x i8> %sel, %a1
72 define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) {
73 ; SSE-LABEL: test_x86_sse2_psubus_w:
75 ; SSE-NEXT: psubusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd9,0xc1]
76 ; SSE-NEXT: retl ## encoding: [0xc3]
78 ; AVX2-LABEL: test_x86_sse2_psubus_w:
80 ; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1]
81 ; AVX2-NEXT: retl ## encoding: [0xc3]
83 ; SKX-LABEL: test_x86_sse2_psubus_w:
85 ; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
86 ; SKX-NEXT: retl ## encoding: [0xc3]
87 %cmp = icmp ugt <8 x i16> %a0, %a1
88 %sel = select <8 x i1> %cmp, <8 x i16> %a0, <8 x i16> %a1
89 %sub = sub <8 x i16> %sel, %a1
93 define <8 x i8> @test_x86_sse2_paddus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
94 ; SSE-LABEL: test_x86_sse2_paddus_b_64:
96 ; SSE-NEXT: paddusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdc,0xc1]
97 ; SSE-NEXT: retl ## encoding: [0xc3]
99 ; AVX2-LABEL: test_x86_sse2_paddus_b_64:
101 ; AVX2-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdc,0xc1]
102 ; AVX2-NEXT: retl ## encoding: [0xc3]
104 ; SKX-LABEL: test_x86_sse2_paddus_b_64:
106 ; SKX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1]
107 ; SKX-NEXT: retl ## encoding: [0xc3]
108 %1 = add <8 x i8> %a0, %a1
109 %2 = icmp ugt <8 x i8> %a0, %1
110 %3 = select <8 x i1> %2, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i8> %1
114 define <4 x i16> @test_x86_sse2_paddus_w_64(<4 x i16> %a0, <4 x i16> %a1) {
115 ; SSE-LABEL: test_x86_sse2_paddus_w_64:
117 ; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
118 ; SSE-NEXT: retl ## encoding: [0xc3]
120 ; AVX2-LABEL: test_x86_sse2_paddus_w_64:
122 ; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
123 ; AVX2-NEXT: retl ## encoding: [0xc3]
125 ; SKX-LABEL: test_x86_sse2_paddus_w_64:
127 ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
128 ; SKX-NEXT: retl ## encoding: [0xc3]
129 %1 = add <4 x i16> %a0, %a1
130 %2 = icmp ugt <4 x i16> %a0, %1
131 %3 = select <4 x i1> %2, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> %1
135 define <8 x i8> @test_x86_sse2_psubus_b_64(<8 x i8> %a0, <8 x i8> %a1) {
136 ; SSE-LABEL: test_x86_sse2_psubus_b_64:
138 ; SSE-NEXT: psubusb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd8,0xc1]
139 ; SSE-NEXT: retl ## encoding: [0xc3]
141 ; AVX2-LABEL: test_x86_sse2_psubus_b_64:
143 ; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd8,0xc1]
144 ; AVX2-NEXT: retl ## encoding: [0xc3]
146 ; SKX-LABEL: test_x86_sse2_psubus_b_64:
148 ; SKX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1]
149 ; SKX-NEXT: retl ## encoding: [0xc3]
150 %cmp = icmp ugt <8 x i8> %a0, %a1
151 %sel = select <8 x i1> %cmp, <8 x i8> %a0, <8 x i8> %a1
152 %sub = sub <8 x i8> %sel, %a1
156 define <4 x i16> @test_x86_sse2_psubus_w_64(<4 x i16> %a0, <4 x i16> %a1) {
157 ; SSE-LABEL: test_x86_sse2_psubus_w_64:
159 ; SSE-NEXT: psubusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd9,0xc1]
160 ; SSE-NEXT: retl ## encoding: [0xc3]
162 ; AVX2-LABEL: test_x86_sse2_psubus_w_64:
164 ; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd9,0xc1]
165 ; AVX2-NEXT: retl ## encoding: [0xc3]
167 ; SKX-LABEL: test_x86_sse2_psubus_w_64:
169 ; SKX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1]
170 ; SKX-NEXT: retl ## encoding: [0xc3]
171 %cmp = icmp ugt <4 x i16> %a0, %a1
172 %sel = select <4 x i1> %cmp, <4 x i16> %a0, <4 x i16> %a1
173 %sub = sub <4 x i16> %sel, %a1
177 ; This test has a normal add and a saturating add.
178 ; FIXME: This should be an addw and a addusw, but a bad canonicalization makes this not work.
179 define <8 x i16> @add_addusw(<8 x i16> %x, <8 x i16> %y, <8 x i16> %z) {
180 ; SSE-LABEL: add_addusw:
182 ; SSE-NEXT: paddw %xmm2, %xmm1 ## encoding: [0x66,0x0f,0xfd,0xca]
183 ; SSE-NEXT: paddusw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xdd,0xc1]
184 ; SSE-NEXT: retl ## encoding: [0xc3]
186 ; AVX2-LABEL: add_addusw:
188 ; AVX2-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0xfd,0xca]
189 ; AVX2-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdd,0xc1]
190 ; AVX2-NEXT: retl ## encoding: [0xc3]
192 ; SKX-LABEL: add_addusw:
194 ; SKX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xca]
195 ; SKX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1]
196 ; SKX-NEXT: retl ## encoding: [0xc3]
197 %a = add <8 x i16> %y, %z
198 %b = add <8 x i16> %x, %a
199 %c = icmp ugt <8 x i16> %a, %b
200 %res = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %b